From e0854c3f8328d06177b2026e59c51ed146180023 Mon Sep 17 00:00:00 2001
From: Daniel Veillard
Date: Sun, 27 Aug 2000 21:12:29 +0000
Subject: [PATCH] Bunch of new parser cleanup work: - SAX.c tree.c debugXML.c:
fixed bogus behaviour when an undeclared namespace prefix was used, added a
warning. Cleaned up support w.r.t. entities, spilling out a warning and
being pedantic on lookups. - test/warning/ent9 : added testcase for previous
example. - TODO: updated - parserInternals.h parser.c: changed the way names
are parsed now allow infinite size and decrease penalty for normal use -
parser.c: Started a big cleanup/check of the parser code, fixed some of the
most tortuous entity code, spotted code unused anymore - test/*: added
tests for very long names and related nasty things. Daniel
---
ChangeLog | 16 +
SAX.c | 8 +-
TODO | 11 +-
debugXML.c | 23 +-
include/libxml/parserInternals.h | 2 +-
parser.c | 609 ++++++++++++++++++++++---------
parserInternals.h | 2 +-
result/bigentname.xml | 6 +
result/bigname.xml | 2 +
result/bigname2.xml | 2 +
result/noent/bigentname.xml | 6 +
result/noent/bigname.xml | 2 +
result/noent/bigname2.xml | 2 +
result/noent/dtd12 | 2 +-
result/noent/tstblanks.xml | 2 +
result/noent/xml1 | 2 +-
result/tstblanks.xml | 2 +
test/bigentname.xml | 5 +
test/bigname.xml | 1 +
test/bigname2.xml | 1 +
test/tstblanks.xml | 495 +++++++++++++++++++++++++
test/warning/ent9 | 7 +
tree.c | 28 +-
23 files changed, 1038 insertions(+), 198 deletions(-)
create mode 100644 result/bigentname.xml
create mode 100644 result/bigname.xml
create mode 100644 result/bigname2.xml
create mode 100644 result/noent/bigentname.xml
create mode 100644 result/noent/bigname.xml
create mode 100644 result/noent/bigname2.xml
create mode 100644 result/noent/tstblanks.xml
create mode 100644 result/tstblanks.xml
create mode 100644 test/bigentname.xml
create mode 100644 test/bigname.xml
create mode 100644 test/bigname2.xml
create mode 100644 test/tstblanks.xml
create mode 100644 test/warning/ent9
diff --git a/ChangeLog b/ChangeLog
index 8a55c1b5..fefc457a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+Sun Aug 27 22:14:01 CEST 2000 Daniel Veillard
+
+ * SAX.c tree.c debugXML.c: fixed bogus behaviour when an
+ undeclared namespace prefix was used, added a warning.
+ Cleaned up support w.r.t. entities, spilling out a warning
+ and being pedantic on lookups.
+ * test/warning/ent9 : added testcase for previous example.
+ * TODO: updated
+ * parserInternals.h parser.c: changed the way names are parsed
+ now allow infinite size and decrease penalty for normal use
+ * parser.c: Started a big cleanup/check of the parser code,
+ fixed some of the most tortuous entity code, spotted code
+ unused anymore
+ * test/*: added tests for very long names and related nasty
+ things.
+
Sat Aug 26 23:31:04 CEST 2000 Daniel Veillard
* doc/encoding.html: added encoding aliases doc
diff --git a/SAX.c b/SAX.c
index 43e847db..3a7d9a9f 100644
--- a/SAX.c
+++ b/SAX.c
@@ -955,6 +955,12 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
ns = xmlSearchNs(ctxt->myDoc, ret, prefix);
if ((ns == NULL) && (parent != NULL))
ns = xmlSearchNs(ctxt->myDoc, parent, prefix);
+ if ((prefix != NULL) && (ns == NULL)) {
+ ns = xmlNewNs(ret, NULL, prefix);
+ if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ ctxt->sax->warning(ctxt->userData,
+ "Namespace prefix %s is not defined\n", prefix);
+ }
xmlSetNs(ret, ns);
/*
@@ -1121,7 +1127,7 @@ characters(void *ctx, const xmlChar *ch, int len)
}
#endif
} else {
- if (xmlNodeIsText(lastChild)) {
+ if ((xmlNodeIsText(lastChild)) && (ctxt->nodemem != 0)) {
#ifndef XML_USE_BUFFER_CONTENT
/*
* The whole point of maintaining nodelen and nodemem,
diff --git a/TODO b/TODO
index 246eb2da..1fd2e445 100644
--- a/TODO
+++ b/TODO
@@ -6,9 +6,14 @@
TODO:
=====
+- cleanup the mess with URI references when composing entities.
+- performances: there is still improvements needed when parsing Docbook DTD
+ a single function to optimize/avoid.
+- Moving all deprecated functions to a different module, allow to compile
+ it out.
- DOM needs
- xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
+- listing all attributes in a node.
- General checking of DTD validation in presence of namespaces ... hairy
mostly done
- Fix DTD + namespace validity problem
@@ -20,7 +25,7 @@ TODO:
- Find way of representing PERefs in the Dtd so that %entity; can
be saved back.
- Go through erratas and do the cleanup.
- http://www.w3.org/XML/xml-19980210-errata ... bummmer
+ http://www.w3.org/XML/xml-19980210-errata ... started ...
- Handle undefined namespaces in entity contents better ... at least
issue a warning
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
@@ -95,6 +100,8 @@ EXTENSIONS:
Done:
=====
+- DOM needs
+ xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
- problem when parsing hrefs with & with the HTML parser (IRC ac)
- If the internal encoding is not UTF8 saving to a given encoding doesn't
work => fix to force UTF8 encoding ...
diff --git a/debugXML.c b/debugXML.c
index 7c00fb18..ddce5c5e 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -37,6 +37,10 @@
void xmlDebugDumpString(FILE *output, const xmlChar *str) {
int i;
+ if (str == NULL) {
+ fprintf(output, "(NULL)");
+ return;
+ }
for (i = 0;i < 40;i++)
if (str[i] == 0) return;
else if (IS_BLANK(str[i])) fputc(' ', output);
@@ -370,13 +374,20 @@ void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) {
fprintf(output, shift);
if (ns->type == XML_GLOBAL_NAMESPACE)
fprintf(output, "old ");
- if (ns->prefix != NULL)
- fprintf(output, "namespace %s href=", ns->prefix);
- else
- fprintf(output, "default namespace href=");
+ if (ns->href == NULL) {
+ if (ns->prefix != NULL)
+ fprintf(output, "incomplete namespace %s href=NULL\n", ns->prefix);
+ else
+ fprintf(output, "incomplete default namespace href=NULL\n");
+ } else {
+ if (ns->prefix != NULL)
+ fprintf(output, "namespace %s href=", ns->prefix);
+ else
+ fprintf(output, "default namespace href=");
- xmlDebugDumpString(output, ns->href);
- fprintf(output, "\n");
+ xmlDebugDumpString(output, ns->href);
+ fprintf(output, "\n");
+ }
}
void xmlDebugDumpNamespaceList(FILE *output, xmlNsPtr ns, int depth) {
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 8fd6ffc6..7956dd3c 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -15,7 +15,7 @@
extern "C" {
#endif
-#define XML_MAX_NAMELEN 1000
+#define XML_MAX_NAMELEN 100
/************************************************************************
* *
diff --git a/parser.c b/parser.c
index 253d9d48..0964881f 100644
--- a/parser.c
+++ b/parser.c
@@ -1,6 +1,14 @@
/*
* parser.c : an XML 1.0 non-verifying parser
*
+ * References:
+ * The XML specification:
+ * http://www.w3.org/TR/REC-xml
+ * Original 1.0 version:
+ * http://www.w3.org/TR/1998/REC-xml-19980210
+ * XML second edition working draft
+ * http://www.w3.org/TR/2000/WD-xml-2e-20000814
+ *
* See Copyright for the status of this software.
*
* Daniel.Veillard@w3.org
@@ -1176,9 +1184,13 @@ int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int cur, res = 0;
+ /*
+ * It's Okay to use CUR/NEXT here since all the blanks are on
+ * the ASCII range.
+ */
do {
cur = CUR;
- while (IS_BLANK(cur)) {
+ while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
NEXT;
cur = CUR;
res++;
@@ -1188,9 +1200,12 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
xmlPopInput(ctxt);
cur = CUR;
}
+ /*
+ * Need to handle support of entities branching here
+ */
if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
- } while (IS_BLANK(cur));
+ } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
return(res);
}
@@ -1609,10 +1624,10 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
if (ctxt == NULL) return;
- while ((input = inputPop(ctxt)) != NULL) {
+ while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
xmlFreeInputStream(input);
}
- while ((oldname = namePop(ctxt)) != NULL) {
+ while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
xmlFree(oldname);
}
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
@@ -1706,21 +1721,26 @@ xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
int
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
int val = 0;
+ int count = 0;
if (ctxt->token != 0) {
val = ctxt->token;
ctxt->token = 0;
return(val);
}
+ /*
+ * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
+ */
if ((RAW == '&') && (NXT(1) == '#') &&
(NXT(2) == 'x')) {
SKIP(3);
- while (RAW != ';') {
- if ((RAW >= '0') && (RAW <= '9'))
+ GROW;
+ while (RAW != ';') { /* loop blocked by count */
+ if ((RAW >= '0') && (RAW <= '9') && (count < 20))
val = val * 16 + (CUR - '0');
- else if ((RAW >= 'a') && (RAW <= 'f'))
+ else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
val = val * 16 + (CUR - 'a') + 10;
- else if ((RAW >= 'A') && (RAW <= 'F'))
+ else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
val = val * 16 + (CUR - 'A') + 10;
else {
ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
@@ -1733,6 +1753,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
break;
}
NEXT;
+ count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
@@ -1741,8 +1762,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
}
} else if ((RAW == '&') && (NXT(1) == '#')) {
SKIP(2);
- while (RAW != ';') {
- if ((RAW >= '0') && (RAW <= '9'))
+ GROW;
+ while (RAW != ';') { /* loop blocked by count */
+ if ((RAW >= '0') && (RAW <= '9') && (count < 20))
val = val * 10 + (CUR - '0');
else {
ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
@@ -1755,6 +1777,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) {
break;
}
NEXT;
+ count++;
}
if (RAW == ';') {
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
@@ -1818,7 +1841,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
ptr += 3;
cur = *ptr;
- while (cur != ';') {
+ while (cur != ';') { /* Non input consuming loop */
if ((cur >= '0') && (cur <= '9'))
val = val * 16 + (cur - '0');
else if ((cur >= 'a') && (cur <= 'f'))
@@ -1843,7 +1866,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
} else if ((cur == '&') && (ptr[1] == '#')){
ptr += 2;
cur = *ptr;
- while (cur != ';') {
+ while (cur != ';') { /* Non input consuming loops */
if ((cur >= '0') && (cur <= '9'))
val = val * 10 + (cur - '0');
else {
@@ -1912,6 +1935,8 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
* A PEReference may have been detectect in the current input stream
* the handling is done accordingly to
* http://www.w3.org/TR/REC-xml#entproc
+ *
+ * TODO: the default handling part seems deprecated now ... cut it off
*/
void
xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
@@ -1974,7 +1999,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
- * This will be handled by xmlDecodeEntities
+ * This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_CONTENT:
@@ -2026,7 +2051,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
- * This will be handled by xmlDecodeEntities
+ * This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_ATTRIBUTE_VALUE:
@@ -2036,7 +2061,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
* the parser is explicitely asked to substitute
* entities. The SAX callback is called with values
* without entity substitution.
- * This will then be handled by xmlDecodeEntities
+ * This will then be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_ENTITY_DECL:
@@ -2055,6 +2080,10 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
return;
}
+/* TODO: this seems not reached anymore .... Verify ... */
+fprintf(stderr, "Reached deprecated section in xmlParserHandleReference()\n");
+fprintf(stderr, "Please forward the document to Daniel.Veillard@w3.org\n");
+fprintf(stderr, "indicating the version: %s, thanks !\n", xmlParserVersion);
NEXT;
name = xmlScanName(ctxt);
if (name == NULL) {
@@ -2214,7 +2243,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
- * This will be handled by xmlDecodeEntities
+ * This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_DTD:
@@ -2340,6 +2369,11 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
+ * This function is deprecated, we now always process entities content
+ * through xmlStringDecodeEntities
+ *
+ * TODO: remove it in next major release.
+ *
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
@@ -2382,10 +2416,11 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
/*
* Ok loop until we reach one of the ending char or a size limit.
*/
+ GROW;
c = CUR_CHAR(l);
- while ((nbchars < max) && (c != end) &&
+ while ((nbchars < max) && (c != end) && /* NOTUSED */
(c != end2) && (c != end3)) {
-
+ GROW;
if (c == 0) break;
if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
int val = xmlParseCharRef(ctxt);
@@ -2399,7 +2434,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
if ((ent != NULL) &&
(ctxt->replaceEntities != 0)) {
current = ent->content;
- while (*current != 0) {
+ while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
growBuffer(buffer);
@@ -2412,7 +2447,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
growBuffer(buffer);
}
- while (*cur != 0) {
+ while (*cur != 0) { /* non input consuming loop */
buffer[nbchars++] = *cur++;
}
buffer[nbchars++] = ';';
@@ -2432,7 +2467,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
/*
* Pop-up of finished entities.
*/
- while ((RAW == 0) && (ctxt->inputNr > 1))
+ while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
xmlPopInput(ctxt);
break;
@@ -2458,6 +2493,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
* @end2: an end marker xmlChar, 0 if none
* @end3: an end marker xmlChar, 0 if none
*
+ * Takes a entity string content and process to do the adequate subtitutions.
+ *
* [67] Reference ::= EntityRef | CharRef
*
* [69] PEReference ::= '%' Name ';'
@@ -2501,9 +2538,11 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
/*
* Ok loop until we reach one of the ending char or a size limit.
+ * we are operating on already parsed values.
*/
c = CUR_SCHAR(str, l);
- while ((c != 0) && (c != end) && (c != end2) && (c != end3)) {
+ while ((c != 0) && (c != end) && /* non input consuming loop */
+ (c != end2) && (c != end3)) {
if (c == 0) break;
if ((c == '&') && (str[1] == '#')) {
@@ -2516,7 +2555,8 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
fprintf(stderr, "String decoding Entity Reference: %.30s\n",
str);
ent = xmlParseStringEntityRef(ctxt, &str);
- if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
+ if ((ent != NULL) &&
+ (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
if (ent->content != NULL) {
COPY_BUF(0,buffer,nbchars,ent->content[0]);
} else {
@@ -2533,7 +2573,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
ctxt->depth--;
if (rep != NULL) {
current = rep;
- while (*current != 0) {
+ while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars >
buffer_size - XML_PARSER_BUFFER_SIZE) {
@@ -2567,7 +2607,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
ctxt->depth--;
if (rep != NULL) {
current = rep;
- while (*current != 0) {
+ while (*current != 0) { /* non input consuming loop */
buffer[nbchars++] = *current++;
if (nbchars >
buffer_size - XML_PARSER_BUFFER_SIZE) {
@@ -2603,6 +2643,9 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
*
* Checks that the value conforms to the LanguageID production:
*
+ * NOTE: this is somewhat deprecated, those productions were removed from
+ * the XML Second edition.
+ *
* [33] LanguageID ::= Langcode ('-' Subcode)*
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
@@ -2624,7 +2667,7 @@ xmlCheckLanguageID(const xmlChar *lang) {
* IANA code
*/
cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
+ while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
} else if (((cur[0] == 'x') && (cur[1] == '-')) ||
@@ -2633,7 +2676,7 @@ xmlCheckLanguageID(const xmlChar *lang) {
* User code
*/
cur += 2;
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
+ while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
} else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
@@ -2649,7 +2692,7 @@ xmlCheckLanguageID(const xmlChar *lang) {
return(0);
} else
return(0);
- while (cur[0] != 0) {
+ while (cur[0] != 0) { /* non input consuming */
if (cur[0] != '-')
return(0);
cur++;
@@ -2658,7 +2701,7 @@ xmlCheckLanguageID(const xmlChar *lang) {
cur++;
else
return(0);
- while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
+ while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
((cur[0] >= 'a') && (cur[0] <= 'z')))
cur++;
}
@@ -3010,7 +3053,7 @@ xmlStrdup(const xmlChar *cur) {
const xmlChar *p = cur;
if (cur == NULL) return(NULL);
- while (*p != 0) p++;
+ while (*p != 0) p++; /* non input consuming */
return(xmlStrndup(cur, p - cur));
}
@@ -3057,7 +3100,7 @@ xmlCharStrdup(const char *cur) {
const char *p = cur;
if (cur == NULL) return(NULL);
- while (*p != '\0') p++;
+ while (*p != '\0') p++; /* non input consuming */
return(xmlCharStrndup(cur, p - cur));
}
@@ -3081,7 +3124,7 @@ xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
do {
tmp = *str1++ - *str2++;
if (tmp != 0) return(tmp);
- } while ((*str1 != 0) && (*str2 != 0));
+ } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
return (*str1 - *str2);
}
@@ -3109,7 +3152,7 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
if (tmp != 0) return(tmp);
len--;
if (len <= 0) return(0);
- } while ((*str1 != 0) && (*str2 != 0));
+ } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
return (*str1 - *str2);
}
@@ -3126,7 +3169,7 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
const xmlChar *
xmlStrchr(const xmlChar *str, xmlChar val) {
if (str == NULL) return(NULL);
- while (*str != 0) {
+ while (*str != 0) { /* non input consuming */
if (*str == val) return((xmlChar *) str);
str++;
}
@@ -3152,7 +3195,7 @@ xmlStrstr(const xmlChar *str, xmlChar *val) {
n = xmlStrlen(val);
if (n == 0) return(str);
- while (*str != 0) {
+ while (*str != 0) { /* non input consuming */
if (*str == *val) {
if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
}
@@ -3202,7 +3245,7 @@ xmlStrlen(const xmlChar *str) {
int len = 0;
if (str == NULL) return(0);
- while (*str != 0) {
+ while (*str != 0) { /* non input consuming */
str++;
len++;
}
@@ -3261,7 +3304,7 @@ xmlStrcat(xmlChar *cur, const xmlChar *add) {
if (cur == NULL)
return(xmlStrdup(add));
- while (*p != 0) p++;
+ while (*p != 0) p++; /* non input consuming */
return(xmlStrncat(cur, add, p - add));
}
@@ -3392,6 +3435,9 @@ void xmlParseReference(xmlParserCtxtPtr ctxt);
*
* parse an XML namespace name.
*
+ * TODO: this seems not in use anymore, the namespace handling is done on
+ * top of the SAX interfaces, i.e. not on raw input.
+ *
* [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
*
* [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
@@ -3407,9 +3453,11 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
int cur = CUR_CHAR(l);
/* load first the value of the char !!! */
+ GROW;
if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
- while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
+fprintf(stderr, "xmlNamespaceParseNCName: reached loop 3\n");
+ while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
(cur == '.') || (cur == '-') ||
(cur == '_') ||
(IS_COMBINING(cur)) ||
@@ -3420,7 +3468,7 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
if (len >= XML_MAX_NAMELEN) {
fprintf(stderr,
"xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
+ while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
(cur == '.') || (cur == '-') ||
(cur == '_') ||
(IS_COMBINING(cur)) ||
@@ -3439,6 +3487,9 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
* @ctxt: an XML parser context
* @prefix: a xmlChar **
*
+ * TODO: this seems not in use anymore, the namespace handling is done on
+ * top of the SAX interfaces, i.e. not on raw input.
+ *
* parse an XML qualified name
*
* [NS 5] QName ::= (Prefix ':')? LocalPart
@@ -3466,74 +3517,15 @@ xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
return(ret);
}
-/**
- * xmlSplitQName:
- * @ctxt: an XML parser context
- * @name: an XML parser context
- * @prefix: a xmlChar **
- *
- * parse an UTF8 encoded XML qualified name string
- *
- * [NS 5] QName ::= (Prefix ':')? LocalPart
- *
- * [NS 6] Prefix ::= NCName
- *
- * [NS 7] LocalPart ::= NCName
- *
- * Returns the local part, and prefix is updated
- * to get the Prefix if any.
- */
-
-xmlChar *
-xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
- xmlChar buf[XML_MAX_NAMELEN + 5];
- int len = 0;
- xmlChar *ret = NULL;
- const xmlChar *cur = name;
- int c;
-
- *prefix = NULL;
-
- /* xml: prefix is not really a namespace */
- if ((cur[0] == 'x') && (cur[1] == 'm') &&
- (cur[2] == 'l') && (cur[3] == ':'))
- return(xmlStrdup(name));
-
- /* nasty but valid */
- if (cur[0] == ':')
- return(xmlStrdup(name));
-
- c = *cur++;
- while ((c != 0) && (c != ':')) {
- buf[len++] = c;
- c = *cur++;
- }
-
- ret = xmlStrndup(buf, len);
-
- if (c == ':') {
- c = *cur++;
- if (c == 0) return(ret);
- *prefix = ret;
- len = 0;
-
- while (c != 0) {
- buf[len++] = c;
- c = *cur++;
- }
-
- ret = xmlStrndup(buf, len);
- }
-
- return(ret);
-}
-
/**
* xmlNamespaceParseNSDef:
* @ctxt: an XML parser context
*
* parse a namespace prefix declaration
*
+ * TODO: this seems not in use anymore, the namespace handling is done on
+ * top of the SAX interfaces, i.e. not on raw input.
+ *
* [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
*
* [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
@@ -3557,12 +3549,152 @@ xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
return(name);
}
+/**
+ * xmlSplitQName:
+ * @ctxt: an XML parser context
+ * @name: an XML parser context
+ * @prefix: a xmlChar **
+ *
+ * parse an UTF8 encoded XML qualified name string
+ *
+ * [NS 5] QName ::= (Prefix ':')? LocalPart
+ *
+ * [NS 6] Prefix ::= NCName
+ *
+ * [NS 7] LocalPart ::= NCName
+ *
+ * Returns the local part, and prefix is updated
+ * to get the Prefix if any.
+ */
+
+xmlChar *
+xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
+ xmlChar buf[XML_MAX_NAMELEN + 5];
+ xmlChar *buffer = NULL;
+ int len = 0;
+ int max = XML_MAX_NAMELEN;
+ xmlChar *ret = NULL;
+ const xmlChar *cur = name;
+ int c;
+
+ *prefix = NULL;
+
+ /* xml: prefix is not really a namespace */
+ if ((cur[0] == 'x') && (cur[1] == 'm') &&
+ (cur[2] == 'l') && (cur[3] == ':'))
+ return(xmlStrdup(name));
+
+ /* nasty but valid */
+ if (cur[0] == ':')
+ return(xmlStrdup(name));
+
+ c = *cur++;
+ while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
+ buf[len++] = c;
+ c = *cur++;
+ }
+ if (len >= max) {
+ /*
+ * Okay someone managed to make a huge name, so he's ready to pay
+ * for the processing speed.
+ */
+ max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSplitQName: out of memory\n");
+ return(NULL);
+ }
+ memcpy(buffer, buf, len);
+ while ((c != 0) && (c != ':')) { /* tested bigname.xml */
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSplitQName: out of memory\n");
+ return(NULL);
+ }
+ }
+ buffer[len++] = c;
+ c = *cur++;
+ }
+ buffer[len] = 0;
+ }
+
+ if (buffer == NULL)
+ ret = xmlStrndup(buf, len);
+ else {
+ ret = buffer;
+ buffer = NULL;
+ max = XML_MAX_NAMELEN;
+ }
+
+
+ if (c == ':') {
+ c = *cur++;
+ if (c == 0) return(ret);
+ *prefix = ret;
+ len = 0;
+
+ while ((c != 0) && (len < max)) { /* tested bigname2.xml */
+ buf[len++] = c;
+ c = *cur++;
+ }
+ if (len >= max) {
+ /*
+ * Okay someone managed to make a huge name, so he's ready to pay
+ * for the processing speed.
+ */
+ max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSplitQName: out of memory\n");
+ return(NULL);
+ }
+ memcpy(buffer, buf, len);
+ while (c != 0) { /* tested bigname2.xml */
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlSplitQName: out of memory\n");
+ return(NULL);
+ }
+ }
+ buffer[len++] = c;
+ c = *cur++;
+ }
+ buffer[len] = 0;
+ }
+
+ if (buffer == NULL)
+ ret = xmlStrndup(buf, len);
+ else {
+ ret = buffer;
+ }
+ }
+
+ return(ret);
+}
+
/**
* xmlParseQuotedString:
* @ctxt: an XML parser context
*
- * [OLD] Parse and return a string between quotes or doublequotes
- * To be removed at next drop of binary compatibility
+ * Parse and return a string between quotes or doublequotes
+ *
+ * TODO: Deprecated, to be removed at next drop of binary compatibility
*
* Returns the string parser or NULL.
*/
@@ -3578,10 +3710,11 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
fprintf(stderr, "malloc of %d byte failed\n", size);
return(NULL);
}
+fprintf(stderr, "xmlParseQuotedString: reached loop 4\n");
if (RAW == '"') {
NEXT;
c = CUR_CHAR(l);
- while (IS_CHAR(c) && (c != '"')) {
+ while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
if (len + 5 >= size) {
size *= 2;
buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
@@ -3607,7 +3740,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
} else if (RAW == '\''){
NEXT;
c = CUR;
- while (IS_CHAR(c) && (c != '\'')) {
+ while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
if (len + 1 >= size) {
size *= 2;
buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
@@ -3638,12 +3771,14 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
* xmlParseNamespace:
* @ctxt: an XML parser context
*
- * [OLD] xmlParseNamespace: parse specific PI '')) {
+fprintf(stderr, "xmlParseNamespace: reached loop 5\n");
+ while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
/*
* We can have "ns" or "prefix" attributes
* Old encoding as 'href' or 'AS' attributes is still supported
@@ -3755,7 +3891,10 @@ xmlParseNamespace(xmlParserCtxtPtr ctxt) {
* @ctxt: an XML parser context
*
* Trickery: parse an XML name but without consuming the input flow
- * Needed for rollback cases.
+ * Needed for rollback cases. Used only when parsing entities references.
+ *
+ * TODO: seems deprecated now, only used in the default part of
+ * xmlParserHandleReference
*
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
* CombiningChar | Extender
@@ -3778,17 +3917,20 @@ xmlScanName(xmlParserCtxtPtr ctxt) {
return(NULL);
}
- while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
+
+ while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
(NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING(NXT(len))) ||
(IS_EXTENDER(NXT(len)))) {
+ GROW;
buf[len] = NXT(len);
len++;
if (len >= XML_MAX_NAMELEN) {
fprintf(stderr,
"xmlScanName: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
+ while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
+ (IS_DIGIT(NXT(len))) ||
(NXT(len) == '.') || (NXT(len) == '-') ||
(NXT(len) == '_') || (NXT(len) == ':') ||
(IS_COMBINING(NXT(len))) ||
@@ -3821,6 +3963,7 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
xmlChar buf[XML_MAX_NAMELEN + 5];
int len = 0, l;
int c;
+ int count = 0;
GROW;
c = CUR_CHAR(l);
@@ -3830,27 +3973,61 @@ xmlParseName(xmlParserCtxtPtr ctxt) {
return(NULL);
}
- while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
+ if (count++ > 100) {
+ count = 0;
+ GROW;
+ }
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseName: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ /*
+ * Okay someone managed to make a huge name, so he's ready to pay
+ * for the processing speed.
+ */
+ xmlChar *buffer;
+ int max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseName: out of memory\n");
+ return(NULL);
+ }
+ memcpy(buffer, buf, len);
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c))) {
+ if (count++ > 100) {
+ count = 0;
+ GROW;
+ }
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseName: out of memory\n");
+ return(NULL);
+ }
+ }
+ COPY_BUF(l,buffer,len,c);
NEXTL(l);
c = CUR_CHAR(l);
}
- break;
+ buffer[len] = 0;
+ return(buffer);
}
}
return(xmlStrndup(buf, len));
@@ -3887,7 +4064,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
return(NULL);
}
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
@@ -3895,18 +4072,45 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
COPY_BUF(l,buf,len,c);
cur += l;
c = CUR_SCHAR(cur, l);
- if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseName: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
+ /*
+ * Okay someone managed to make a huge name, so he's ready to pay
+ * for the processing speed.
+ */
+ xmlChar *buffer;
+ int max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseStringName: out of memory\n");
+ return(NULL);
+ }
+ memcpy(buffer, buf, len);
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c))) {
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseStringName: out of memory\n");
+ return(NULL);
+ }
+ }
+ COPY_BUF(l,buffer,len,c);
cur += l;
c = CUR_SCHAR(cur, l);
}
- break;
+ buffer[len] = 0;
+ *str = cur;
+ return(buffer);
}
}
*str = cur;
@@ -3928,32 +4132,68 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
- xmlChar buf[XML_MAX_NAMELEN];
- int len = 0;
- int c,l;
+ xmlChar buf[XML_MAX_NAMELEN + 5];
+ int len = 0, l;
+ int c;
+ int count = 0;
GROW;
c = CUR_CHAR(l);
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c))) {
+ if (count++ > 100) {
+ count = 0;
+ GROW;
+ }
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
if (len >= XML_MAX_NAMELEN) {
- fprintf(stderr,
- "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
- while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ /*
+ * Okay someone managed to make a huge token, so he's ready to pay
+ * for the processing speed.
+ */
+ xmlChar *buffer;
+ int max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseNmtoken: out of memory\n");
+ return(NULL);
+ }
+ memcpy(buffer, buf, len);
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
(c == '.') || (c == '-') ||
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c))) {
+ if (count++ > 100) {
+ count = 0;
+ GROW;
+ }
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "xmlParseName: out of memory\n");
+ return(NULL);
+ }
+ }
+ COPY_BUF(l,buffer,len,c);
NEXTL(l);
c = CUR_CHAR(l);
}
- break;
+ buffer[len] = 0;
+ return(buffer);
}
}
if (len == 0)
@@ -4019,7 +4259,8 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
* In practice it means we stop the loop only when back at parsing
* the initial entity and the quote is found
*/
- while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
+ while ((IS_CHAR(c)) && ((c != stop) || /* checked */
+ (ctxt->input != input))) {
if (len + 5 >= size) {
size *= 2;
buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
@@ -4033,7 +4274,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
/*
* Pop-up of finished entities.
*/
- while ((RAW == 0) && (ctxt->inputNr > 1))
+ while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
xmlPopInput(ctxt);
GROW;
@@ -6837,7 +7078,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
(value[1] == 0) && (value[0] == '<') &&
(!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
/*
- * TODO: get definite answer on this !!!
+ * DONE: get definite answer on this !!!
* Lots of entity decls are used to declare a single
* char
*
@@ -6852,6 +7093,11 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
* tests, this is broken. However the XML REC uses
* it. Is the XML REC not well-formed ????
* This is a hack to avoid this problem
+ *
+ * ANSWER: since lt gt amp .. are already defined,
+ * this is a redefinition and hence the fact that the
+ * contentis not well balanced is not a Wf error, this
+ * is lousy but acceptable.
*/
list = xmlNewDocText(ctxt->myDoc, value);
if (list != NULL) {
@@ -6931,46 +7177,64 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
ctxt->sax->reference(ctxt->userData, ent->name);
return;
} else if (ctxt->replaceEntities) {
- xmlParserInputPtr input;
+ if ((ctxt->node != NULL) && (ent->children != NULL)) {
+ /*
+ * Seems we are generating the DOM content, do
+ * a simple tree copy
+ */
+ xmlNodePtr new;
+ new = xmlCopyNodeList(ent->children);
+
+ xmlAddChildList(ctxt->node, new);
+ /*
+ * This is to avoid a nasty side effect, see
+ * characters() in SAX.c
+ */
+ ctxt->nodemem = 0;
+ ctxt->nodelen = 0;
+ return;
+ } else {
+ /*
+ * Probably running in SAX mode
+ */
+ xmlParserInputPtr input;
- input = xmlNewEntityInputStream(ctxt, ent);
- xmlPushInput(ctxt, input);
- if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
- (RAW == '<') && (NXT(1) == '?') &&
- (NXT(2) == 'x') && (NXT(3) == 'm') &&
- (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
- xmlParseTextDecl(ctxt);
- if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
- /*
- * The XML REC instructs us to stop parsing right here
- */
- ctxt->instate = XML_PARSER_EOF;
- return;
- }
- if (input->standalone == 1) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "external parsed entities cannot be standalone\n");
- ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
+ input = xmlNewEntityInputStream(ctxt, ent);
+ xmlPushInput(ctxt, input);
+ if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
+ (RAW == '<') && (NXT(1) == '?') &&
+ (NXT(2) == 'x') && (NXT(3) == 'm') &&
+ (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
+ xmlParseTextDecl(ctxt);
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+ /*
+ * The XML REC instructs us to stop parsing right here
+ */
+ ctxt->instate = XML_PARSER_EOF;
+ return;
+ }
+ if (input->standalone == 1) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "external parsed entities cannot be standalone\n");
+ ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
+ ctxt->wellFormed = 0;
+ ctxt->disableSAX = 1;
+ }
}
+ return;
}
- /*
- * !!! TODO: build the tree under the entity first
- * 1234
- */
- return;
}
+ } else {
+ val = ent->content;
+ if (val == NULL) return;
+ /*
+ * inline the entity.
+ */
+ if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
+ (!ctxt->disableSAX))
+ ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
}
- val = ent->content;
- if (val == NULL) return;
- /*
- * inline the entity.
- */
- if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
}
}
@@ -7820,15 +8084,14 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
/*
* Check that xml:lang conforms to the specification
+ * No more registered as an error, just generate a warning now
+ * since this was deprecated in XML second edition
*/
if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
if (!xmlCheckLanguageID(val)) {
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Invalid value for xml:lang : %s\n", val);
- ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
- ctxt->wellFormed = 0;
- ctxt->disableSAX = 1;
+ if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
+ ctxt->sax->warning(ctxt->userData,
+ "Malformed value for xml:lang : %s\n", val);
}
}
diff --git a/parserInternals.h b/parserInternals.h
index 8fd6ffc6..7956dd3c 100644
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -15,7 +15,7 @@
extern "C" {
#endif
-#define XML_MAX_NAMELEN 1000
+#define XML_MAX_NAMELEN 100
/************************************************************************
* *
diff --git a/result/bigentname.xml b/result/bigentname.xml
new file mode 100644
index 00000000..6b7183f0
--- /dev/null
+++ b/result/bigentname.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+&WhatHeSaid;
diff --git a/result/bigname.xml b/result/bigname.xml
new file mode 100644
index 00000000..885fd7c5
--- /dev/null
+++ b/result/bigname.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/result/bigname2.xml b/result/bigname2.xml
new file mode 100644
index 00000000..a48c3598
--- /dev/null
+++ b/result/bigname2.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/result/noent/bigentname.xml b/result/noent/bigentname.xml
new file mode 100644
index 00000000..f19c697e
--- /dev/null
+++ b/result/noent/bigentname.xml
@@ -0,0 +1,6 @@
+
+
+
+]>
+He said "Yes"
diff --git a/result/noent/bigname.xml b/result/noent/bigname.xml
new file mode 100644
index 00000000..885fd7c5
--- /dev/null
+++ b/result/noent/bigname.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/result/noent/bigname2.xml b/result/noent/bigname2.xml
new file mode 100644
index 00000000..a48c3598
--- /dev/null
+++ b/result/noent/bigname2.xml
@@ -0,0 +1,2 @@
+
+
diff --git a/result/noent/dtd12 b/result/noent/dtd12
index 8c4bf36d..5639acc5 100644
--- a/result/noent/dtd12
+++ b/result/noent/dtd12
@@ -3,4 +3,4 @@
]>
-He said &YN;
+He said "Yes"
diff --git a/result/noent/tstblanks.xml b/result/noent/tstblanks.xml
new file mode 100644
index 00000000..25618591
--- /dev/null
+++ b/result/noent/tstblanks.xml
@@ -0,0 +1,2 @@
+
+content
diff --git a/result/noent/xml1 b/result/noent/xml1
index a197468d..951830ad 100644
--- a/result/noent/xml1
+++ b/result/noent/xml1
@@ -7,5 +7,5 @@
An ampersand (&) may be escaped
numerically (&) or with a general entity
- (&amp;).
+ (&).
diff --git a/result/tstblanks.xml b/result/tstblanks.xml
new file mode 100644
index 00000000..25618591
--- /dev/null
+++ b/result/tstblanks.xml
@@ -0,0 +1,2 @@
+
+content
diff --git a/test/bigentname.xml b/test/bigentname.xml
new file mode 100644
index 00000000..aa6e3364
--- /dev/null
+++ b/test/bigentname.xml
@@ -0,0 +1,5 @@
+
+
+]>
+&WhatHeSaid;
diff --git a/test/bigname.xml b/test/bigname.xml
new file mode 100644
index 00000000..6c303e47
--- /dev/null
+++ b/test/bigname.xml
@@ -0,0 +1 @@
+
diff --git a/test/bigname2.xml b/test/bigname2.xml
new file mode 100644
index 00000000..c67cda91
--- /dev/null
+++ b/test/bigname2.xml
@@ -0,0 +1 @@
+
diff --git a/test/tstblanks.xml b/test/tstblanks.xml
new file mode 100644
index 00000000..7c5a23d5
--- /dev/null
+++ b/test/tstblanks.xml
@@ -0,0 +1,495 @@
+
+content
diff --git a/test/warning/ent9 b/test/warning/ent9
new file mode 100644
index 00000000..009e322e
--- /dev/null
+++ b/test/warning/ent9
@@ -0,0 +1,7 @@
+
+prefix is indeclared here">
+]>
+
+ &xml;
+
diff --git a/tree.c b/tree.c
index b5e116ca..21c86635 100644
--- a/tree.c
+++ b/tree.c
@@ -129,19 +129,14 @@ xmlUpgradeOldNs(xmlDocPtr doc) {
* Creation of a new Namespace. This function will refuse to create
* a namespace with a similar prefix than an existing one present on this
* node.
+ * We use href==NULL in the case of an element creation where the namespace
+ * was not defined.
* Returns returns a new namespace pointer or NULL
*/
xmlNsPtr
xmlNewNs(xmlNodePtr node, const xmlChar *href, const xmlChar *prefix) {
xmlNsPtr cur;
- if (href == NULL) {
-#ifdef DEBUG_TREE
- fprintf(stderr, "xmlNewNs: href == NULL !\n");
-#endif
- return(NULL);
- }
-
/*
* Allocate a new Namespace and fill the fields.
*/
@@ -1244,9 +1239,8 @@ xmlNewPI(const xmlChar *name, const xmlChar *content) {
* @ns: namespace if any
* @name: the node name
*
- * Creation of a new node element. @ns and @content are optionnal (NULL).
- * If content is non NULL, a child list containing the TEXTs and
- * ENTITY_REFs node will be created.
+ * Creation of a new node element. @ns is optionnal (NULL).
+ *
* Returns a pointer to the new node object.
*/
xmlNodePtr
@@ -3217,6 +3211,10 @@ xmlGetNsList(xmlDocPtr doc, xmlNodePtr node) {
* recurse on the parents until it finds the defined namespace
* or return NULL otherwise.
* @nameSpace can be NULL, this is a search for the default namespace.
+ * We don't allow to cross entities boundaries. If you don't declare
+ * the namespace within those you will be in troubles !!! A warning
+ * is generated to cover this case.
+ *
* Returns the namespace pointer or NULL.
*/
xmlNsPtr
@@ -3225,12 +3223,18 @@ xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, const xmlChar *nameSpace) {
if (node == NULL) return(NULL);
while (node != NULL) {
+ if ((node->type == XML_ENTITY_REF_NODE) ||
+ (node->type == XML_ENTITY_NODE) ||
+ (node->type == XML_ENTITY_DECL))
+ return(NULL);
if (node->type == XML_ELEMENT_NODE) {
cur = node->nsDef;
while (cur != NULL) {
- if ((cur->prefix == NULL) && (nameSpace == NULL))
+ if ((cur->prefix == NULL) && (nameSpace == NULL) &&
+ (cur->href != NULL))
return(cur);
if ((cur->prefix != NULL) && (nameSpace != NULL) &&
+ (cur->href != NULL) &&
(!xmlStrcmp(cur->prefix, nameSpace)))
return(cur);
cur = cur->next;
@@ -4840,7 +4844,7 @@ xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) {
#endif
return;
}
- if (cur->type == XML_LOCAL_NAMESPACE) {
+ if ((cur->type == XML_LOCAL_NAMESPACE) && (cur->href != NULL)) {
/* Within the context of an element attributes */
if (cur->prefix != NULL) {
xmlOutputBufferWriteString(buf, " xmlns:");