From 34e3f641918086047dccc0992b639967e1ad9091 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 29 Jul 2008 09:02:27 +0000 Subject: [PATCH] implement XML-1.0 5th edition, add parser option XML_PARSE_OLD10 to stick * include/libxml/parser.h include/libxml/xmlerror.h parser.c: implement XML-1.0 5th edition, add parser option XML_PARSE_OLD10 to stick to old behaviour * testapi.c gentest.py: modified slightly and regenerated * Makefile.am: add testchar Daniel svn path=/trunk/; revision=3755 --- ChangeLog | 8 + Makefile.am | 7 +- gentest.py | 3 +- include/libxml/parser.h | 3 +- include/libxml/xmlerror.h | 1 + parser.c | 493 +++++++++++++++++++++++++------------- python/setup.py | 74 +++--- testapi.c | 21 -- 8 files changed, 387 insertions(+), 223 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6284f723..c9935a23 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Tue Jul 29 10:59:36 CEST 2008 Daniel Veillard + + * include/libxml/parser.h include/libxml/xmlerror.h parser.c: + implement XML-1.0 5th edition, add parser option XML_PARSE_OLD10 + to stick to old behaviour + * testapi.c gentest.py: modified slightly and regenerated + * Makefile.am: add testchar + Thu Jul 24 16:57:20 CEST 2008 Daniel Veillard * Makefile.am testchar.c Makefile.tests README.tests: add a diff --git a/Makefile.am b/Makefile.am index 4381069f..d26efccb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,7 +8,7 @@ INCLUDES = -I$(top_builddir)/include -I@srcdir@/include @THREAD_CFLAGS@ @Z_CFLAG noinst_PROGRAMS=testSchemas testRelax testSAX testHTML testXPath testURI \ testThreads testC14N testAutomata testRegexp \ - testReader testapi testModule runtest runsuite + testReader testapi testModule runtest runsuite testchar bin_PROGRAMS = xmllint xmlcatalog @@ -55,6 +55,11 @@ runtest_LDFLAGS = runtest_DEPENDENCIES = $(DEPS) runtest_LDADD= @BASE_THREAD_LIBS@ @RDL_LIBS@ $(LDADDS) +testchar_SOURCES=testchar.c +testchar_LDFLAGS = +testchar_DEPENDENCIES = $(DEPS) +testchar_LDADD= @RDL_LIBS@ $(LDADDS) + runsuite_SOURCES=runsuite.c runsuite_LDFLAGS = runsuite_DEPENDENCIES = $(DEPS) diff --git a/gentest.py b/gentest.py index 4538eb86..13792463 100755 --- a/gentest.py +++ b/gentest.py @@ -174,7 +174,8 @@ skipped_memcheck = [ "xmlLoadCatalog", "xmlAddEncodingAlias", "xmlInitCharEncodingHandlers", "xmlCatalogCleanup", "xmlSchemaGetBuiltInType", "htmlParseFile", "htmlCtxtReadFile", # loads the catalogs - "xmlTextReaderSchemaValidate", "xmlSchemaCleanupTypes" # initialize the schemas type system + "xmlTextReaderSchemaValidate", "xmlSchemaCleanupTypes", # initialize the schemas type system + "xmlCatalogResolve", "xmlIOParseDTD" # loads the catalogs ] # diff --git a/include/libxml/parser.h b/include/libxml/parser.h index fe63bda5..0e7f8fff 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -1089,9 +1089,10 @@ typedef enum { XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ - XML_PARSE_COMPACT = 1<<16 /* compact small text nodes; no modification of + XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree) */ + XML_PARSE_OLD10 = 1<<17 /* parse using XML-1.0 before update 5 */ } xmlParserOption; XMLPUBFUN void XMLCALL diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h index 8cbab5ec..56262045 100644 --- a/include/libxml/xmlerror.h +++ b/include/libxml/xmlerror.h @@ -203,6 +203,7 @@ typedef enum { XML_ERR_NOTATION_PROCESSING, /* 105 */ XML_WAR_NS_COLUMN, /* 106 */ XML_WAR_ENTITY_REDEFINED, /* 107 */ + XML_ERR_UNKNOWN_VERSION, /* 108 */ XML_NS_ERR_XML_NAMESPACE = 200, XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */ XML_NS_ERR_QNAME, /* 202 */ diff --git a/parser.c b/parser.c index f20fd050..0fa7a654 100644 --- a/parser.c +++ b/parser.c @@ -418,7 +418,7 @@ xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *msg, const xmlChar *str1, const xmlChar *str2) { xmlStructuredErrorFunc schannel = NULL; - + if ((ctxt != NULL) && (ctxt->disableSAX != 0) && (ctxt->instate == XML_PARSER_EOF)) return; @@ -809,7 +809,7 @@ xmlHasFeature(xmlFeature feature) return(1); #else return(0); -#endif +#endif case XML_WITH_ZLIB: #ifdef LIBXML_ZLIB_ENABLED return(1); @@ -1293,7 +1293,7 @@ nsPop(xmlParserCtxtPtr ctxt, int nr) } if (ctxt->nsNr <= 0) return (0); - + for (i = 0;i < nr;i++) { ctxt->nsNr--; ctxt->nsTab[ctxt->nsNr] = NULL; @@ -2758,10 +2758,198 @@ xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { * * ************************************************************************/ -static const xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt); +/************************************************************************ + * * + * Routines to parse Name, NCName and NmToken * + * * + ************************************************************************/ +unsigned long nbParseName = 0; +unsigned long nbParseNmToken = 0; +unsigned long nbParseNCName = 0; +unsigned long nbParseNCNameComplex = 0; +unsigned long nbParseNameComplex = 0; +unsigned long nbParseStringName = 0; +/* + * The two following functions are related to the change of accepted + * characters for Name and NmToken in the Revision 5 of XML-1.0 + * They correspond to the modified production [4] and the new production [4a] + * changes in that revision. Also note that the macros used for the + * productions Letter, Digit, CombiningChar and Extender are not needed + * anymore. + * We still keep compatibility to pre-revision5 parsing semantic if the + * new XML_PARSE_OLD10 option is given to the parser. + */ +static int +xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if (IS_LETTER(c) || (c == '_') || (c == ':')) + return(1); + } + return(0); +} + +static int +xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)))) + return(1); + } else { + if ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c))) + return(1); + } + return(0); +} + static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc, int normalize); +static const xmlChar * +xmlParseNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + + nbParseNameComplex++; + + /* + * Handler for more complex cases + */ + GROW; + c = CUR_CHAR(l); + if ((ctxt->options & XML_PARSE_OLD10) == 0) { + /* + * Use the new checks of production [4] [4a] amd [5] of the + * Update 5 of XML-1.0 + */ + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!(((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + (c == '_') || (c == ':') || + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + (((c >= 'a') && (c <= 'z')) || + ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9')) || /* !start */ + (c == '_') || (c == ':') || + (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ + ((c >= 0xC0) && (c <= 0xD6)) || + ((c >= 0xD8) && (c <= 0xF6)) || + ((c >= 0xF8) && (c <= 0x2FF)) || + ((c >= 0x300) && (c <= 0x36F)) || /* !start */ + ((c >= 0x370) && (c <= 0x37D)) || + ((c >= 0x37F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF)) + )) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } else { + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!IS_LETTER(c) && (c != '_') && + (c != ':'))) { + return(NULL); + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + ((IS_LETTER(c)) || (IS_DIGIT(c)) || + (c == '.') || (c == '-') || + (c == '_') || (c == ':') || + (IS_COMBINING(c)) || + (IS_EXTENDER(c)))) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + } + if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + /** * xmlParseName: * @ctxt: an XML parser context @@ -2786,6 +2974,8 @@ xmlParseName(xmlParserCtxtPtr ctxt) { GROW; + nbParseName++; + /* * Accelerator for simple ASCII names */ @@ -2811,9 +3001,93 @@ xmlParseName(xmlParserCtxtPtr ctxt) { return(ret); } } + /* accelerator for special cases */ return(xmlParseNameComplex(ctxt)); } +static const xmlChar * +xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { + int len = 0, l; + int c; + int count = 0; + + nbParseNCNameComplex++; + + /* + * Handler for more complex cases + */ + GROW; + c = CUR_CHAR(l); + if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ + (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { + return(NULL); + } + + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ + (xmlIsNameChar(ctxt, c) && (c != ':'))) { + if (count++ > 100) { + count = 0; + GROW; + } + len += l; + NEXTL(l); + c = CUR_CHAR(l); + } + return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); +} + +/** + * xmlParseNCName: + * @ctxt: an XML parser context + * @len: lenght of the string parsed + * + * parse an XML name. + * + * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5NS] NCName ::= (Letter | '_') (NCNameChar)* + * + * Returns the Name parsed or NULL + */ + +static const xmlChar * +xmlParseNCName(xmlParserCtxtPtr ctxt) { + const xmlChar *in; + const xmlChar *ret; + int count = 0; + + nbParseNCName++; + + /* + * Accelerator for simple ASCII names + */ + in = ctxt->input->cur; + if (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + (*in == '_')) { + in++; + while (((*in >= 0x61) && (*in <= 0x7A)) || + ((*in >= 0x41) && (*in <= 0x5A)) || + ((*in >= 0x30) && (*in <= 0x39)) || + (*in == '_') || (*in == '-') || + (*in == '.')) + in++; + if ((*in > 0) && (*in < 0x80)) { + count = in - ctxt->input->cur; + ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); + ctxt->input->cur = in; + ctxt->nbChars += count; + ctxt->input->col += count; + if (ret == NULL) { + xmlErrMemory(ctxt, NULL); + } + return(ret); + } + } + return(xmlParseNCNameComplex(ctxt)); +} + /** * xmlParseNameAndCompare: * @ctxt: an XML parser context @@ -2832,15 +3106,15 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { const xmlChar *ret; GROW; - + in = ctxt->input->cur; while (*in != 0 && *in == *cmp) { - ++in; + ++in; ++cmp; ctxt->input->col++; } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { - /* success */ + /* success */ ctxt->input->cur = in; return (const xmlChar*) 1; } @@ -2853,42 +3127,6 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { return ret; } -static const xmlChar * -xmlParseNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_') && - (c != ':'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /** * xmlParseStringName: * @ctxt: an XML parser context @@ -2914,17 +3152,17 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { int len = 0, l; int c; + nbParseStringName++; + c = CUR_SCHAR(cur, l); - if (!IS_LETTER(c) && (c != '_') && - (c != ':')) { + if (!xmlIsNameStartChar(ctxt, c)) { return(NULL); } - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + COPY_BUF(l,buf,len,c); + cur += l; + c = CUR_SCHAR(cur, l); + while (xmlIsNameChar(ctxt, c)) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); @@ -2935,19 +3173,14 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || - /* test bigentname.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (len + 10 > max) { xmlChar *tmp; max *= 2; @@ -2976,7 +3209,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { /** * xmlParseNmtoken: * @ctxt: an XML parser context - * + * * parse an XML Nmtoken. * * [7] Nmtoken ::= (NameChar)+ @@ -2993,14 +3226,12 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { int c; int count = 0; + nbParseNmToken++; + GROW; c = CUR_CHAR(l); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (count++ > 100) { count = 0; GROW; @@ -3015,18 +3246,14 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) { */ xmlChar *buffer; int max = len * 2; - + buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar)); if (buffer == NULL) { xmlErrMemory(ctxt, NULL); return(NULL); } memcpy(buffer, buf, len); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ - (c == '.') || (c == '-') || - (c == '_') || (c == ':') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c))) { + while (xmlIsNameChar(ctxt, c)) { if (count++ > 100) { count = 0; GROW; @@ -7652,38 +7879,6 @@ xmlParseEndTag(xmlParserCtxtPtr ctxt) { * * ************************************************************************/ -static const xmlChar * -xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { - int len = 0, l; - int c; - int count = 0; - - /* - * Handler for more complex cases - */ - GROW; - c = CUR_CHAR(l); - if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ - (!IS_LETTER(c) && (c != '_'))) { - return(NULL); - } - - while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ - ((IS_LETTER(c)) || (IS_DIGIT(c)) || - (c == '.') || (c == '-') || (c == '_') || - (IS_COMBINING(c)) || - (IS_EXTENDER(c)))) { - if (count++ > 100) { - count = 0; - GROW; - } - len += l; - NEXTL(l); - c = CUR_CHAR(l); - } - return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); -} - /* * xmlGetNamespace: * @ctxt: an XML parser context @@ -7708,56 +7903,6 @@ xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { return(NULL); } -/** - * xmlParseNCName: - * @ctxt: an XML parser context - * @len: lenght of the string parsed - * - * parse an XML name. - * - * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | - * CombiningChar | Extender - * - * [5NS] NCName ::= (Letter | '_') (NCNameChar)* - * - * Returns the Name parsed or NULL - */ - -static const xmlChar * -xmlParseNCName(xmlParserCtxtPtr ctxt) { - const xmlChar *in; - const xmlChar *ret; - int count = 0; - - /* - * Accelerator for simple ASCII names - */ - in = ctxt->input->cur; - if (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - (*in == '_')) { - in++; - while (((*in >= 0x61) && (*in <= 0x7A)) || - ((*in >= 0x41) && (*in <= 0x5A)) || - ((*in >= 0x30) && (*in <= 0x39)) || - (*in == '_') || (*in == '-') || - (*in == '.')) - in++; - if ((*in > 0) && (*in < 0x80)) { - count = in - ctxt->input->cur; - ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); - ctxt->input->cur = in; - ctxt->nbChars += count; - ctxt->input->col += count; - if (ret == NULL) { - xmlErrMemory(ctxt, NULL); - } - return(ret); - } - } - return(xmlParseNCNameComplex(ctxt)); -} - /** * xmlParseQName: * @ctxt: an XML parser context @@ -9003,7 +9148,9 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { * * parse the XML version value. * - * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+ + * [26] VersionNum ::= '1.' [0-9]+ + * + * In practice allow [0-9].[0-9]+ at that level * * Returns the string giving the XML version number, or NULL */ @@ -9020,11 +9167,21 @@ xmlParseVersionNum(xmlParserCtxtPtr ctxt) { return(NULL); } cur = CUR; - while (((cur >= 'a') && (cur <= 'z')) || - ((cur >= 'A') && (cur <= 'Z')) || - ((cur >= '0') && (cur <= '9')) || - (cur == '_') || (cur == '.') || - (cur == ':') || (cur == '-')) { + if (!((cur >= '0') && (cur <= '9'))) { + free(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + if (cur != '.') { + free(buf); + return(NULL); + } + buf[len++] = cur; + NEXT; + cur=CUR; + while ((cur >= '0') && (cur <= '9')) { if (len + 1 >= size) { xmlChar *tmp; @@ -9117,7 +9274,7 @@ xmlParseEncName(xmlParserCtxtPtr ctxt) { xmlErrMemory(ctxt, NULL); return(NULL); } - + buf[len++] = cur; NEXT; cur = CUR; @@ -9363,11 +9520,23 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { } else { if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) { /* - * TODO: Blueberry should be detected here + * Changed here for XML-1.0 5th edition */ - xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, - "Unsupported version '%s'\n", - version, NULL); + if (ctxt->options & XML_PARSE_OLD10) { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } else { + if ((version[0] == '1') && ((version[1] == '.'))) { + xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version, NULL); + } else { + xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION, + "Unsupported version '%s'\n", + version); + } + } } if (ctxt->version != NULL) xmlFree((void *) ctxt->version); diff --git a/python/setup.py b/python/setup.py index 8ab8c7f4..b54da493 100755 --- a/python/setup.py +++ b/python/setup.py @@ -61,13 +61,13 @@ includes_dir = [ "/opt/include", os.path.join(ROOT,'include'), HOME -] +]; xml_includes="" for dir in includes_dir: if not missing(dir + "/libxml2/libxml/tree.h"): xml_includes=dir + "/libxml2" - break + break; if xml_includes == "": print "failed to find headers for libxml2: update includes_dir" @@ -77,7 +77,7 @@ iconv_includes="" for dir in includes_dir: if not missing(dir + "/iconv.h"): iconv_includes=dir - break + break; if iconv_includes == "": print "failed to find headers for libiconv: update includes_dir" @@ -90,22 +90,22 @@ os.path.join(ROOT,'lib'), xml_files = ["libxml2-api.xml", "libxml2-python-api.xml", "libxml.c", "libxml.py", "libxml_wrap.h", "types.c", - "xmlgenerator.py", "README", "TODO", "drv_libxml2.py"] + "xmlgenerator.py", "README", "TODO", "drv_libxml2.py"] xslt_files = ["libxslt-api.xml", "libxslt-python-api.xml", "libxslt.c", "libxsl.py", "libxslt_wrap.h", - "xsltgenerator.py"] + "xsltgenerator.py"] if missing("libxml2-py.c") or missing("libxml2.py"): try: - try: - import xmlgenerator - except: - import generator + try: + import xmlgenerator + except: + import generator except: - print "failed to find and generate stubs for libxml2, aborting ..." - print sys.exc_type, sys.exc_value - sys.exit(1) + print "failed to find and generate stubs for libxml2, aborting ..." + print sys.exc_type, sys.exc_value + sys.exit(1) head = open("libxml.py", "r") generated = open("libxml2class.py", "r") @@ -116,7 +116,7 @@ if missing("libxml2-py.c") or missing("libxml2.py"): else: result.write(line) for line in generated.readlines(): - result.write(line) + result.write(line) head.close() generated.close() result.close() @@ -126,39 +126,39 @@ if missing("libxslt-py.c") or missing("libxslt.py"): if missing("xsltgenerator.py") or missing("libxslt-api.xml"): print "libxslt stub generator not found, libxslt not built" else: - try: - import xsltgenerator - except: - print "failed to generate stubs for libxslt, aborting ..." - print sys.exc_type, sys.exc_value - else: - head = open("libxsl.py", "r") - generated = open("libxsltclass.py", "r") - result = open("libxslt.py", "w") - for line in head.readlines(): + try: + import xsltgenerator + except: + print "failed to generate stubs for libxslt, aborting ..." + print sys.exc_type, sys.exc_value + else: + head = open("libxsl.py", "r") + generated = open("libxsltclass.py", "r") + result = open("libxslt.py", "w") + for line in head.readlines(): if WITHDLLS: result.write(altImport(line)) else: result.write(line) - for line in generated.readlines(): - result.write(line) - head.close() - generated.close() - result.close() - with_xslt=1 + for line in generated.readlines(): + result.write(line) + head.close() + generated.close() + result.close() + with_xslt=1 else: with_xslt=1 if with_xslt == 1: xslt_includes="" for dir in includes_dir: - if not missing(dir + "/libxslt/xsltconfig.h"): - xslt_includes=dir + "/libxslt" - break + if not missing(dir + "/libxslt/xsltconfig.h"): + xslt_includes=dir + "/libxslt" + break; if xslt_includes == "": - print "failed to find headers for libxslt: update includes_dir" - with_xslt = 0 + print "failed to find headers for libxslt: update includes_dir" + with_xslt = 0 descr = "libxml2 package" @@ -198,7 +198,7 @@ extens=[Extension('libxml2mod', c_files, include_dirs=includes, libraries=libs, define_macros=macros)] if with_xslt == 1: extens.append(Extension('libxsltmod', xslt_c_files, include_dirs=includes, - library_dirs=libdirs, + library_dirs=libdirs, libraries=libs, define_macros=macros)) if missing("MANIFEST"): @@ -208,8 +208,8 @@ if missing("MANIFEST"): for file in xml_files: manifest.write(file + "\n") if with_xslt == 1: - for file in xslt_files: - manifest.write(file + "\n") + for file in xslt_files: + manifest.write(file + "\n") manifest.close() if WITHDLLS: diff --git a/testapi.c b/testapi.c index 44a3cd02..402187f4 100644 --- a/testapi.c +++ b/testapi.c @@ -6181,7 +6181,6 @@ test_xmlCatalogResolve(void) { int test_ret = 0; #if defined(LIBXML_CATALOG_ENABLED) - int mem_base; xmlChar * ret_val; xmlChar * pubID; /* the public ID string */ int n_pubID; @@ -6190,7 +6189,6 @@ test_xmlCatalogResolve(void) { for (n_pubID = 0;n_pubID < gen_nb_const_xmlChar_ptr;n_pubID++) { for (n_sysID = 0;n_sysID < gen_nb_const_xmlChar_ptr;n_sysID++) { - mem_base = xmlMemBlocks(); pubID = gen_const_xmlChar_ptr(n_pubID, 0); sysID = gen_const_xmlChar_ptr(n_sysID, 1); @@ -6200,14 +6198,6 @@ test_xmlCatalogResolve(void) { des_const_xmlChar_ptr(n_pubID, (const xmlChar *)pubID, 0); des_const_xmlChar_ptr(n_sysID, (const xmlChar *)sysID, 1); xmlResetLastError(); - if (mem_base != xmlMemBlocks()) { - printf("Leak of %d blocks found in xmlCatalogResolve", - xmlMemBlocks() - mem_base); - test_ret++; - printf(" %d", n_pubID); - printf(" %d", n_sysID); - printf("\n"); - } } } function_tests++; @@ -13153,7 +13143,6 @@ test_xmlIOParseDTD(void) { #if defined(LIBXML_VALID_ENABLED) #ifdef LIBXML_VALID_ENABLED - int mem_base; xmlDtdPtr ret_val; xmlSAXHandlerPtr sax; /* the SAX handler block or NULL */ int n_sax; @@ -13165,7 +13154,6 @@ test_xmlIOParseDTD(void) { for (n_sax = 0;n_sax < gen_nb_xmlSAXHandlerPtr;n_sax++) { for (n_input = 0;n_input < gen_nb_xmlParserInputBufferPtr;n_input++) { for (n_enc = 0;n_enc < gen_nb_xmlCharEncoding;n_enc++) { - mem_base = xmlMemBlocks(); sax = gen_xmlSAXHandlerPtr(n_sax, 0); input = gen_xmlParserInputBufferPtr(n_input, 1); enc = gen_xmlCharEncoding(n_enc, 2); @@ -13178,15 +13166,6 @@ test_xmlIOParseDTD(void) { des_xmlParserInputBufferPtr(n_input, input, 1); des_xmlCharEncoding(n_enc, enc, 2); xmlResetLastError(); - if (mem_base != xmlMemBlocks()) { - printf("Leak of %d blocks found in xmlIOParseDTD", - xmlMemBlocks() - mem_base); - test_ret++; - printf(" %d", n_sax); - printf(" %d", n_input); - printf(" %d", n_enc); - printf("\n"); - } } } }