From 4255d504151db75c17f85192ce74f45dd2d65533 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 16 Apr 2002 15:50:10 +0000 Subject: [PATCH] merged the current state of XML Schemas implementation, it is not * Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c include/libxml/Makefile.am include/libxml/schemasInternals.h include/libxml/xmlautomata.h include/libxml/xmlregexp.h include/libxml/xmlschemas.h include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h include/libxml/xmlversion.h.in : merged the current state of XML Schemas implementation, it is not configured in by default, a specific --schemas configure option has been added. * test/automata test/regexp test/schemas Makefile.am result/automata result/regexp result/schemas: merged automata/regexp/schemas regression tests Daniel --- ChangeLog | 15 + Makefile.am | 93 +- TODO_SCHEMAS | 31 + configure.in | 15 + genUnicode.py | 256 ++ include/libxml/Makefile.am | 8 +- include/libxml/schemasInternals.h | 275 ++ include/libxml/xmlautomata.h | 75 + include/libxml/xmlregexp.h | 77 + include/libxml/xmlschemas.h | 105 + include/libxml/xmlschemastypes.h | 45 + include/libxml/xmlunicode.h | 164 + include/libxml/xmlversion.h.in | 36 + result/automata/a | 4 + result/automata/aba | 6 + result/automata/abaa | 5 + result/automata/abba | 4 + result/automata/po | 2 + result/regexp/content | 12 + result/regexp/hard | 7 + result/regexp/ncname | 6 + result/regexp/ranges | 15 + result/regexp/xpath | 32 + result/schemas/po | 0 test/automata/a | 14 + test/automata/aba | 26 + test/automata/abaa | 43 + test/automata/abba | 30 + test/automata/po | 19 + test/regexp/content | 12 + test/regexp/hard | 7 + test/regexp/ncname | 6 + test/regexp/ranges | 15 + test/regexp/xpath | 37 + test/schemas/po.xml | 32 + test/schemas/po.xsd | 59 + testAutomata.c | 306 ++ testRegexp.c | 157 + testSchemas.c | 120 + xmlregexp.c | 3470 ++++++++++++++++++++ xmlschemas.c | 4941 +++++++++++++++++++++++++++++ xmlschemastypes.c | 490 +++ xmlunicode.c | 4290 +++++++++++++++++++++++++ 43 files changed, 15353 insertions(+), 9 deletions(-) create mode 100644 TODO_SCHEMAS create mode 100755 genUnicode.py create mode 100644 include/libxml/schemasInternals.h create mode 100644 include/libxml/xmlautomata.h create mode 100644 include/libxml/xmlregexp.h create mode 100644 include/libxml/xmlschemas.h create mode 100644 include/libxml/xmlschemastypes.h create mode 100644 include/libxml/xmlunicode.h create mode 100644 result/automata/a create mode 100644 result/automata/aba create mode 100644 result/automata/abaa create mode 100644 result/automata/abba create mode 100644 result/automata/po create mode 100644 result/regexp/content create mode 100644 result/regexp/hard create mode 100644 result/regexp/ncname create mode 100644 result/regexp/ranges create mode 100644 result/regexp/xpath create mode 100644 result/schemas/po create mode 100644 test/automata/a create mode 100644 test/automata/aba create mode 100644 test/automata/abaa create mode 100644 test/automata/abba create mode 100644 test/automata/po create mode 100644 test/regexp/content create mode 100644 test/regexp/hard create mode 100644 test/regexp/ncname create mode 100644 test/regexp/ranges create mode 100644 test/regexp/xpath create mode 100644 test/schemas/po.xml create mode 100644 test/schemas/po.xsd create mode 100644 testAutomata.c create mode 100644 testRegexp.c create mode 100644 testSchemas.c create mode 100644 xmlregexp.c create mode 100644 xmlschemas.c create mode 100644 xmlschemastypes.c create mode 100644 xmlunicode.c diff --git a/ChangeLog b/ChangeLog index 3df5ee46..92bf6f57 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +Tue Apr 16 17:46:43 CEST 2002 Daniel Veillard + + * Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c + testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c + xmlunicode.c include/libxml/Makefile.am + include/libxml/schemasInternals.h include/libxml/xmlautomata.h + include/libxml/xmlregexp.h include/libxml/xmlschemas.h + include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h + include/libxml/xmlversion.h.in : merged the current state of + XML Schemas implementation, it is not configured in by default, + a specific --schemas configure option has been added. + * test/automata test/regexp test/schemas Makefile.am + result/automata result/regexp result/schemas: + merged automata/regexp/schemas regression tests + Tue Apr 16 09:48:44 CEST 2002 Daniel Veillard * xpath.c: Gary found a compile time problem, fixes #78823 diff --git a/Makefile.am b/Makefile.am index 0755576c..3d90f4d2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,12 +1,12 @@ ## Process this file with automake to produce Makefile.in -#AUTOMAKE_ OPTIONS=no-dependencies +# AUTOMAKE_ OPTIONS=no-dependencies SUBDIRS = include . doc example python INCLUDES = -I@srcdir@/include -I$(top_builddir)/include @THREAD_CFLAGS@ @Z_CFLAGS@ noinst_PROGRAMS=testSAX testHTML testXPath testURI testDocbook testThreads \ - testC14N + testC14N testAutomata testSchemas testRegexp bin_PROGRAMS = xmllint xmlcatalog @@ -22,15 +22,16 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \ valid.c xlink.c HTMLparser.c HTMLtree.c debugXML.c xpath.c \ xpointer.c xinclude.c nanohttp.c nanoftp.c DOCBparser.c \ - catalog.c globals.c threads.c c14n.c triostr.c trio.c - + catalog.c globals.c threads.c c14n.c \ + xmlregexp.c xmlschemas.c schemastypes.c xmlunicode.c \ + triostr.c trio.c else libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \ valid.c xlink.c HTMLparser.c HTMLtree.c debugXML.c xpath.c \ xpointer.c xinclude.c nanohttp.c nanoftp.c DOCBparser.c \ - catalog.c globals.c threads.c c14n.c - + catalog.c globals.c threads.c c14n.c \ + xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c endif DEPS = $(top_builddir)/libxml2.la @@ -86,11 +87,26 @@ testURI_LDFLAGS = testURI_DEPENDENCIES = $(DEPS) testURI_LDADD= $(LDADDS) +testRegexp_SOURCES=testRegexp.c +testRegexp_LDFLAGS = +testRegexp_DEPENDENCIES = $(DEPS) +testRegexp_LDADD= $(LDADDS) + +testAutomata_SOURCES=testAutomata.c +testAutomata_LDFLAGS = +testAutomata_DEPENDENCIES = $(DEPS) +testAutomata_LDADD= $(LDADDS) + +testSchemas_SOURCES=testSchemas.c +testSchemas_LDFLAGS = +testSchemas_DEPENDENCIES = $(DEPS) +testSchemas_LDADD= $(LDADDS) + check-local: tests testall : tests SVGtests SAXtests -tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests Scripttests Catatests @TEST_THREADS@ +tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests Scripttests Catatests @TEST_SCHEMAS@ @TEST_THREADS@ @(cd python ; $(MAKE) tests) valgrind: @@ -557,6 +573,67 @@ Validtests : xmllint$(EXEEXT) rm result.$$name error.$$name ; \ fi ; fi ; done) +Regexptests: testRegexp$(EXEEXT) + @(echo > .memdump) + @echo "##" + @echo "## Regexp regression tests" + @echo "##" + -@(for i in $(srcdir)/test/regexp/* ; do \ + name=`basename $$i`; \ + if [ ! -d $$i ] ; then \ + if [ ! -f $(srcdir)/result/regexp/$$name ] ; then \ + echo New test file $$name ; \ + $(CHECKER) $(top_builddir)/testRegexp -i $$i > $(srcdir)/result/regexp/$$name; \ + else \ + echo Testing $$name ; \ + $(CHECKER) $(top_builddir)/testRegexp -i $$i > result.$$name ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/regexp/$$name result.$$name ; \ + rm result.$$name ; \ + fi ; fi ; done) + +Automatatests: testAutomata$(EXEEXT) + @(echo > .memdump) + @echo "##" + @echo "## Automata regression tests" + @echo "##" + -@(for i in $(srcdir)/test/automata/* ; do \ + name=`basename $$i`; \ + if [ ! -d $$i ] ; then \ + if [ ! -f $(srcdir)/result/automata/$$name ] ; then \ + echo New test file $$name ; \ + $(CHECKER) $(top_builddir)/testAutomata $$i > $(srcdir)/result/automata/$$name; \ + else \ + echo Testing $$name ; \ + $(CHECKER) $(top_builddir)/testAutomata $$i > result.$$name ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/automata/$$name result.$$name ; \ + rm result.$$name ; \ + fi ; fi ; done) + + +Schemastests: testSchemas$(EXEEXT) + @(echo > .memdump) + @echo "##" + @echo "## Schemas regression tests" + @echo "##" + -@(for i in $(srcdir)/test/schemas/*.xml ; do \ + name=`basename $$i .xml`; \ + if [ ! -f $(srcdir)/test/schemas/$$name.xsd ] ; then continue ; fi ; \ + schemas="$(srcdir)/test/schemas/$$name.xsd" ; \ + if [ ! -d $$i ] ; then \ + if [ ! -f $(srcdir)/result/schemas/$$name ] ; then \ + echo New test file $$name ; \ + $(CHECKER) $(top_builddir)/testSchemas $$schemas $$i > $(srcdir)/result/schemas/$$name; \ + else \ + echo Testing $$name ; \ + $(CHECKER) $(top_builddir)/testSchemas $$schemas $$i > result.$$name ; \ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/schemas/$$name result.$$name ; \ + rm result.$$name ; \ + fi ; fi ; done) + + dist-hook: libxml.spec -cp libxml.spec $(distdir) (cd $(srcdir) ; tar -cf - --exclude CVS win32 macos vms test result SAXresult ) | (cd $(distdir); tar xf -) @@ -582,7 +659,7 @@ EXTRA_DIST = xml2-config.in xml2Conf.sh.in libxml.spec.in libxml.spec \ $(man_MANS) libxml-2.0.pc.in \ trionan.c trionan.h triostr.c triostr.h trio.c trio.h \ triop.h triodef.h libxml.h \ - testThreadsWin32.c + testThreadsWin32.c genUnicode.py pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = libxml-2.0.pc diff --git a/TODO_SCHEMAS b/TODO_SCHEMAS new file mode 100644 index 00000000..145a4eda --- /dev/null +++ b/TODO_SCHEMAS @@ -0,0 +1,31 @@ +- implement counted transitions at the automata level + +- Unicode: + + upgrade to 3.2 + + improve the python script to generate better test + expressions to check the list of ranges. + +- Implement the interface at the SAX level + +- Implement the missing parts in the Structure part + + all content model + + enumerations + + countless others c.f. the TODO scattered in the code + +- Complete the Built-In datatype collections and Facets implementations + +- Regression tests based on + + the primer: + http://www.w3.org/TR/xmlschema-0/ + + the Schemas Test Collection: + http://www.w3.org/2001/05/xmlschema-test-collection/ + + archives of the schemas-dev list + +- Integrity constraints: + + what's that ? How need to read about it + +- "formal" checking, i.e. go through the full Structure spec and + bind code and associated parts of the Schemas spec + +- go though the erratas + http://www.w3.org/2001/05/xmlschema-errata diff --git a/configure.in b/configure.in index e6738934..b478f5be 100644 --- a/configure.in +++ b/configure.in @@ -274,6 +274,9 @@ if test "${LOGNAME}" = "veillard" -a "`pwd`" = "/u/veillard/XML" ; then if test "${with_xptr}" = "" ; then with_xptr="yes" fi + if test "${with_schemas}" = "" ; then + with_schemas="yes" + fi CFLAGS="-g -O -pedantic -W -Wunused -Wimplicit -Wreturn-type -Wswitch -Wcomment -Wtrigraphs -Wformat -Wchar-subscripts -Wuninitialized -Wparentheses -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Winline -Wredundant-decls " dnl -Wcast-qual -ansi fi @@ -513,6 +516,18 @@ M_LIBS="-lm" XML_LIBS="-lxml2 $Z_LIBS $THREAD_LIBS $ICONV_LIBS $M_LIBS $LIBS" AC_SUBST(WITH_ICONV) +AC_ARG_WITH(schemas, [ --with-schemas Add experimental Schemas support (off)]) +if test "$with_schemas" = "yes" ; then + echo Enabling Schemas support + WITH_SCHEMAS=1 + TEST_SCHEMAS="Regexptests Automatatests Schemastests" +else + WITH_SCHEMAS=0 + TEST_SCHEMAS= +fi +AC_SUBST(WITH_SCHEMAS) +AC_SUBST(TEST_SCHEMAS) + AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)]) if test "$with_debug" = "no" ; then echo Disabling DEBUG support diff --git a/genUnicode.py b/genUnicode.py new file mode 100755 index 00000000..c5668fdc --- /dev/null +++ b/genUnicode.py @@ -0,0 +1,256 @@ +#!/usr/bin/python -u +import sys +import string +import time + +sources = "Blocks-4.txt UnicodeData-3.1.0.txt" + +try: + blocks = open("Blocks-4.txt", "r") +except: + print "Missing Blocks-4.txt, aborting ..." + sys.exit(1) + +BlockNames = {} +for line in blocks.readlines(): + if line[0] == '#': + continue + line = string.strip(line) + if line == '': + continue + try: + fields = string.split(line, ';') + range = string.strip(fields[0]) + (start, end) = string.split(range, "..") + name = string.strip(fields[1]) + name = string.replace(name, ' ', '') + except: + print "Failed to process line: %s" % (line) + continue + BlockNames[name] = ("0x"+start, "0x"+end) +blocks.close() +print "Parsed %d blocks descriptions" % (len(BlockNames.keys())) + +try: + data = open("UnicodeData-3.1.0.txt", "r") +except: + print "Missing UnicodeData-3.1.0.txt, aborting ..." + sys.exit(1) + +nbchar = 0; +Categories = {} +for line in data.readlines(): + if line[0] == '#': + continue + line = string.strip(line) + if line == '': + continue + try: + fields = string.split(line, ';') + point = string.strip(fields[0]) + value = 0 + while point != '': + value = value * 16 + if point[0] >= '0' and point[0] <= '9': + value = value + ord(point[0]) - ord('0') + elif point[0] >= 'A' and point[0] <= 'F': + value = value + 10 + ord(point[0]) - ord('A') + elif point[0] >= 'a' and point[0] <= 'f': + value = value + 10 + ord(point[0]) - ord('a') + point = point[1:] + name = fields[2] + except: + print "Failed to process line: %s" % (line) + continue + + nbchar = nbchar + 1 + try: + Categories[name].append(value) + except: + try: + Categories[name] = [value] + except: + print "Failed to process line: %s" % (line) + try: + Categories[name[0]].append(value) + except: + try: + Categories[name[0]] = [value] + except: + print "Failed to process line: %s" % (line) + +blocks.close() +print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys())) +#reduce the number list into ranges +for cat in Categories.keys(): + list = Categories[cat] + start = -1 + prev = -1 + end = -1 + ranges = [] + for val in list: + if start == -1: + start = val + prev = val + continue + elif val == prev + 1: + prev = val + continue + elif prev == start: + ranges.append((prev, prev)) + start = val + prev = val + continue + else: + ranges.append((start, prev)) + start = val + prev = val + continue + if prev == start: + ranges.append((prev, prev)) + else: + ranges.append((start, prev)) + Categories[cat] = ranges + +# +# Generate the resulting files +# +try: + header = open("xmlunicode.h", "w") +except: + print "Failed to open xmlunicode.h" + sys.exit(1) + +try: + output = open("xmlunicode.c", "w") +except: + print "Failed to open xmlunicode.c" + sys.exit(1) + +date = time.asctime(time.localtime(time.time())) + +header.write( +"""/* + * xmlunicode.h: this header exports interfaces for the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: %s + * Sources: %s + * Daniel Veillard + */ + +#ifndef __XML_UNICODE_H__ +#define __XML_UNICODE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +""" % (date, sources)); +output.write( +"""/* + * xmlunicode.c: this module implements the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: %s + * Sources: %s + * Daniel Veillard + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_UNICODE_ENABLED + +#include +#include +#include + +""" % (date, sources)); + +keys = BlockNames.keys() +keys.sort() +for block in keys: + (start, end) = BlockNames[block] + name = string.replace(block, '-', '') + header.write("int\txmlUCSIs%s\t(int code);\n" % name) + output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) + output.write(" *\n * Check whether the character is part of %s UCS Block\n"% + (block)) + output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write("int\nxmlUCSIs%s(int code) {\n" % name) + output.write(" return((code >= %s) && (code <= %s));\n" % (start, end)) + output.write("}\n\n") + +header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n") +output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n") +output.write(" * @block: UCS block name\n") +output.write(" *\n * Check whether the caracter is part of the UCS Block\n") +output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n"); +output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n") +keys = BlockNames.keys() +keys.sort() +for block in keys: + name = string.replace(block, '-', '') + output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" % + (block, name)); +output.write(" return(-1);\n}\n\n") + + +keys = Categories.keys() +keys.sort() +for name in keys: + ranges = Categories[name] + header.write("int\txmlUCSIsCat%s\t(int code);\n" % name) + output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) + output.write(" *\n * Check whether the character is part of %s UCS Category\n"% + (name)) + output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); + output.write("int\nxmlUCSIsCat%s(int code) {\n" % name) + start = 1 + for range in ranges: + (begin, end) = range; + if start: + output.write(" return("); + start = 0 + else: + output.write(" ||\n "); + if (begin == end): + output.write("(code == %s)" % (hex(begin))) + else: + output.write("((code >= %s) && (code <= %s))" % ( + hex(begin), hex(end))) + output.write(");\n}\n\n") + +header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n") +output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n") +output.write(" * @cat: UCS Category name\n") +output.write(" *\n * Check whether the caracter is part of the UCS Category\n") +output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n"); +output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n") +keys = Categories.keys() +keys.sort() +for name in keys: + output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" % + (name, name)); +output.write(" return(-1);\n}\n\n") + +header.write(""" +#ifdef __cplusplus +} +#endif +#endif /* __XML_UNICODE_H__ */ +"""); +output.write(""" +#endif /* LIBXML_UNICODE_ENABLED */ +"""); +header.close() +output.close() diff --git a/include/libxml/Makefile.am b/include/libxml/Makefile.am index e460c886..8d8a3d76 100644 --- a/include/libxml/Makefile.am +++ b/include/libxml/Makefile.am @@ -32,7 +32,13 @@ xmlinc_HEADERS = \ catalog.h \ threads.h \ globals.h \ - c14n.h + c14n.h \ + xmlautomata.h \ + xmlregexp.h \ + xmlschemas.h \ + schemasInternals.h \ + xmlschemastypes.h \ + xmlunicode.h install-exec-hook: $(mkinstalldirs) $(DESTDIR)$(xmlincdir) diff --git a/include/libxml/schemasInternals.h b/include/libxml/schemasInternals.h new file mode 100644 index 00000000..1322c4cb --- /dev/null +++ b/include/libxml/schemasInternals.h @@ -0,0 +1,275 @@ +/* + * schemasInternals.h : internal interfaces for the XML Schemas handling + * and schema validity checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_SCHEMA_INTERNALS_H__ +#define __XML_SCHEMA_INTERNALS_H__ + +#if defined(WIN32) && defined(_MSC_VER) +#include +#else +#include +#endif +#ifdef LIBXML_SCHEMAS_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * XML Schemas defines multiple type of types. + */ +typedef enum { + XML_SCHEMA_TYPE_BASIC = 1, + XML_SCHEMA_TYPE_ANY, + XML_SCHEMA_TYPE_FACET, + XML_SCHEMA_TYPE_SIMPLE, + XML_SCHEMA_TYPE_COMPLEX, + XML_SCHEMA_TYPE_SEQUENCE, + XML_SCHEMA_TYPE_CHOICE, + XML_SCHEMA_TYPE_ALL, + XML_SCHEMA_TYPE_SIMPLE_CONTENT, + XML_SCHEMA_TYPE_COMPLEX_CONTENT, + XML_SCHEMA_TYPE_UR, + XML_SCHEMA_TYPE_RESTRICTION, + XML_SCHEMA_TYPE_EXTENSION, + XML_SCHEMA_TYPE_ELEMENT, + XML_SCHEMA_TYPE_ATTRIBUTE, + XML_SCHEMA_TYPE_GROUP, + XML_SCHEMA_TYPE_NOTATION, + XML_SCHEMA_TYPE_LIST, + XML_SCHEMA_TYPE_UNION, + XML_SCHEMA_FACET_MININCLUSIVE = 1000, + XML_SCHEMA_FACET_MINEXCLUSIVE, + XML_SCHEMA_FACET_MAXINCLUSIVE, + XML_SCHEMA_FACET_MAXEXCLUSIVE, + XML_SCHEMA_FACET_TOTALDIGITS, + XML_SCHEMA_FACET_FRACTIONDIGITS, + XML_SCHEMA_FACET_PATTERN, + XML_SCHEMA_FACET_ENUMERATION, + XML_SCHEMA_FACET_WHITESPACE, + XML_SCHEMA_FACET_LENGTH, + XML_SCHEMA_FACET_MAXLENGTH, + XML_SCHEMA_FACET_MINLENGTH +} xmlSchemaTypeType; + +typedef enum { + XML_SCHEMA_CONTENT_UNKNOWN = 0, + XML_SCHEMA_CONTENT_EMPTY = 1, + XML_SCHEMA_CONTENT_ELEMENTS, + XML_SCHEMA_CONTENT_MIXED, + XML_SCHEMA_CONTENT_SIMPLE, + XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS, + XML_SCHEMA_CONTENT_BASIC +} xmlSchemaContentType; + +typedef struct _xmlSchemaVal xmlSchemaVal; +typedef xmlSchemaVal *xmlSchemaValPtr; + +typedef struct _xmlSchemaType xmlSchemaType; +typedef xmlSchemaType *xmlSchemaTypePtr; + +typedef struct _xmlSchemaFacet xmlSchemaFacet; +typedef xmlSchemaFacet *xmlSchemaFacetPtr; + +/** + * Annotation + */ +typedef struct _xmlSchemaAnnot xmlSchemaAnnot; +typedef xmlSchemaAnnot *xmlSchemaAnnotPtr; +struct _xmlSchemaAnnot { + struct _xmlSchemaAnnot *next; + xmlNodePtr content; /* the annotation */ +}; + +/** + * An attribute definition. + */ + +#define XML_SCHEMAS_ANYATTR_SKIP 1 +#define XML_SCHEMAS_ANYATTR_LAX 2 +#define XML_SCHEMAS_ANYATTR_STRICT 3 + +typedef struct _xmlSchemaAttribute xmlSchemaAttribute; +typedef xmlSchemaAttribute *xmlSchemaAttributePtr; +struct _xmlSchemaAttribute { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlChar *typeName; + xmlChar *typeNs; + xmlSchemaAnnotPtr annot; + + xmlSchemaTypePtr base; + int occurs; + xmlChar *defValue; + xmlSchemaTypePtr subtypes; +}; + +/** + * An attribute group definition. + * + * xmlSchemaAttribute and xmlSchemaAttributeGroup start of structures + * must be kept similar + */ +typedef struct _xmlSchemaAttributeGroup xmlSchemaAttributeGroup; +typedef xmlSchemaAttributeGroup *xmlSchemaAttributeGroupPtr; +struct _xmlSchemaAttributeGroup { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + + xmlSchemaAttributePtr attributes; +}; + + +/** + * Schemas type definition. + */ +#define XML_SCHEMAS_TYPE_MIXED 1 << 0 + +struct _xmlSchemaType { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next;/* the next type if in a sequence ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; + xmlSchemaAttributePtr attributes; + xmlNodePtr node; + int minOccurs; + int maxOccurs; + + int flags; + xmlSchemaContentType contentType; + xmlChar *base; + xmlChar *baseNs; + xmlSchemaTypePtr baseType; + xmlSchemaFacetPtr facets; +}; + +/** + * An element definition. + * + * xmlSchemaType, xmlSchemaFacet and xmlSchemaElement start of + * structures must be kept similar + */ +#define XML_SCHEMAS_ELEM_NILLABLE 1 << 0 +#define XML_SCHEMAS_ELEM_GLOBAL 1 << 1 +#define XML_SCHEMAS_ELEM_DEFAULT 1 << 2 +#define XML_SCHEMAS_ELEM_FIXED 1 << 3 +#define XML_SCHEMAS_ELEM_ABSTRACT 1 << 4 +#define XML_SCHEMAS_ELEM_TOPLEVEL 1 << 5 +#define XML_SCHEMAS_ELEM_REF 1 << 6 + +typedef struct _xmlSchemaElement xmlSchemaElement; +typedef xmlSchemaElement *xmlSchemaElementPtr; +struct _xmlSchemaElement { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next;/* the next type if in a sequence ... */ + xmlChar *name; + xmlChar *id; + xmlChar *ref; + xmlChar *refNs; + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; + xmlSchemaAttributePtr attributes; + xmlNodePtr node; + int minOccurs; + int maxOccurs; + + int flags; + xmlChar *targetNamespace; + xmlChar *namedType; + xmlChar *namedTypeNs; + xmlChar *substGroup; + xmlChar *substGroupNs; + xmlChar *scope; + xmlChar *value; + struct _xmlSchemaElement *refDecl; + xmlRegexpPtr contModel; +}; + +/** + * An facet definition. + * + */ +#define XML_SCHEMAS_FACET_UNKNOWN 0 +#define XML_SCHEMAS_FACET_PRESERVE 1 +#define XML_SCHEMAS_FACET_REPLACE 2 +#define XML_SCHEMAS_FACET_COLLAPSE 3 + +struct _xmlSchemaFacet { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaFacet *next;/* the next type if in a sequence ... */ + xmlChar *value; + xmlChar *id; + xmlSchemaAnnotPtr annot; + xmlNodePtr node; + int fixed; + int whitespace; + xmlSchemaValPtr val; + xmlRegexpPtr regexp; +}; + +/** + * A notation definition. + */ +typedef struct _xmlSchemaNotation xmlSchemaNotation; +typedef xmlSchemaNotation *xmlSchemaNotationPtr; +struct _xmlSchemaNotation { + xmlSchemaTypeType type; /* The kind of type */ + xmlChar *name; + xmlSchemaAnnotPtr annot; + xmlChar *identifier; +}; + +/** + * A Schemas definition + */ +#define XML_SCHEMAS_QUALIF_ELEM 1 << 0 +#define XML_SCHEMAS_QUALIF_ATTR 1 << 1 +struct _xmlSchema { + xmlChar *name; /* schema name */ + xmlChar *targetNamespace; /* the target namespace */ + xmlChar *version; + xmlChar *id; + xmlDocPtr doc; + xmlSchemaAnnotPtr annot; + int flags; + + xmlHashTablePtr typeDecl; + xmlHashTablePtr attrDecl; + xmlHashTablePtr attrgrpDecl; + xmlHashTablePtr elemDecl; + xmlHashTablePtr notaDecl; +}; + +void xmlSchemaFreeType (xmlSchemaTypePtr type); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_INTERNALS_H__ */ + + diff --git a/include/libxml/xmlautomata.h b/include/libxml/xmlautomata.h new file mode 100644 index 00000000..ad3acf7d --- /dev/null +++ b/include/libxml/xmlautomata.h @@ -0,0 +1,75 @@ +/* + * automata.h : description of the API to build regexp automats + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#ifndef __XML_AUTOMATA_H__ +#define __XML_AUTOMATA_H__ + +#if defined(WIN32) && defined(_MSC_VER) +#include +#else +#include +#endif +#ifdef LIBXML_AUTOMATA_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlAutomataPtr: + * + * A libxml automata description, It can be compiled into a regexp + */ +typedef struct _xmlAutomata xmlAutomata; +typedef xmlAutomata *xmlAutomataPtr; + +/** + * xmlAutomataStatePtr: + * + * A state int the automata description, + */ +typedef struct _xmlAutomataState xmlAutomataState; +typedef xmlAutomataState *xmlAutomataStatePtr; + +/* + * Building API + */ +xmlAutomataPtr xmlNewAutomata (void); +void xmlFreeAutomata (xmlAutomataPtr am); + +xmlAutomataStatePtr xmlAutomataGetInitState (xmlAutomataPtr am); +int xmlAutomataSetFinalState(xmlAutomataPtr am, + xmlAutomataStatePtr state); +xmlAutomataStatePtr xmlAutomataNewState (xmlAutomataPtr am); +xmlAutomataStatePtr xmlAutomataNewTransition(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + void *data); +xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + int min, + int max, + void *data); +xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to); +int xmlAutomataNewCounter (xmlAutomataPtr am); + +xmlRegexpPtr xmlAutomataCompile (xmlAutomataPtr am); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_AUTOMATA_ENABLED */ +#endif /* __XML_AUTOMATA_H__ */ diff --git a/include/libxml/xmlregexp.h b/include/libxml/xmlregexp.h new file mode 100644 index 00000000..e4b9afe6 --- /dev/null +++ b/include/libxml/xmlregexp.h @@ -0,0 +1,77 @@ +/* + * regexp.h : describes the basic API for libxml regular expressions handling + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#ifndef __XML_REGEXP_H__ +#define __XML_REGEXP_H__ + +#if defined(WIN32) && defined(_MSC_VER) +#include +#else +#include +#endif +#ifdef LIBXML_REGEXP_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlRegexpPtr: + * + * A libxml regular expression, they can actually be far more complex + * thank the POSIX regex expressions. + */ +typedef struct _xmlRegexp xmlRegexp; +typedef xmlRegexp *xmlRegexpPtr; + +/** + * xmlRegExecCtxtPtr: + * + * A libxml progressive regular expression evaluation context + */ +typedef struct _xmlRegExecCtxt xmlRegExecCtxt; +typedef xmlRegExecCtxt *xmlRegExecCtxtPtr; + +/* + * The POSIX like API + */ +xmlRegexpPtr xmlRegexpCompile(const xmlChar *regexp); +void xmlRegFreeRegexp(xmlRegexpPtr regexp); +int xmlRegexpExec (xmlRegexpPtr comp, + const xmlChar *value); +void xmlRegexpPrint (FILE *output, + xmlRegexpPtr regexp); + +/* + * Callback function when doing a transition in the automata + */ +typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec, + const xmlChar *token, + void *transdata, + void *inputdata); + +/* + * The progressive API + */ +xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp, + xmlRegExecCallbacks callback, + void *data); +void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec); +int xmlRegExecPushString (xmlRegExecCtxtPtr exec, + const xmlChar *value, + void *data); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_REGEXP_ENABLED */ + +#endif /*__XML_REGEXP_H__ */ diff --git a/include/libxml/xmlschemas.h b/include/libxml/xmlschemas.h new file mode 100644 index 00000000..58bf645a --- /dev/null +++ b/include/libxml/xmlschemas.h @@ -0,0 +1,105 @@ +/* + * schemas.h : interface to the XML Schemas handling and schema validity + * checking + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + + +#ifndef __XML_SCHEMA_H__ +#define __XML_SCHEMA_H__ + +#if defined(WIN32) && defined(_MSC_VER) +#include +#else +#include +#endif +#ifdef LIBXML_SCHEMAS_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_SCHEMAS_ERR_OK = 0, + XML_SCHEMAS_ERR_NOROOT = 1, + XML_SCHEMAS_ERR_UNDECLAREDELEM, + XML_SCHEMAS_ERR_NOTTOPLEVEL, + XML_SCHEMAS_ERR_MISSING, + XML_SCHEMAS_ERR_WRONGELEM, + XML_SCHEMAS_ERR_NOTYPE, + XML_SCHEMAS_ERR_NOROLLBACK, + XML_SCHEMAS_ERR_ISABSTRACT, + XML_SCHEMAS_ERR_NOTEMPTY, + XML_SCHEMAS_ERR_HAVEDEFAULT, + XML_SCHEMAS_ERR_NOTNILLABLE, + XML_SCHEMAS_ERR_EXTRACONTENT, + XML_SCHEMAS_ERR_INVALIDATTR, + XML_SCHEMAS_ERR_INVALIDELEM, + XML_SCHEMAS_ERR_CONSTRUCT, + XML_SCHEMAS_ERR_INTERNAL, + XML_SCHEMAS_ERR_NOTSIMPLE, + XML_SCHEMAS_ERR_ATTRUNKNOWN, + XML_SCHEMAS_ERR_ATTRINVALID, + XML_SCHEMAS_ERR_, + XML_SCHEMAS_ERR_XXX +} xmlSchemaValidError; + + +/** + * The schemas related types are kept internal + */ +typedef struct _xmlSchema xmlSchema; +typedef xmlSchema *xmlSchemaPtr; + +/** + * A schemas validation context + */ +typedef void (*xmlSchemaValidityErrorFunc) (void *ctx, const char *msg, ...); +typedef void (*xmlSchemaValidityWarningFunc) (void *ctx, const char *msg, ...); + +typedef struct _xmlSchemaParserCtxt xmlSchemaParserCtxt; +typedef xmlSchemaParserCtxt *xmlSchemaParserCtxtPtr; + +typedef struct _xmlSchemaValidCtxt xmlSchemaValidCtxt; +typedef xmlSchemaValidCtxt *xmlSchemaValidCtxtPtr; + +/* + * Interfaces for parsing. + */ +xmlSchemaParserCtxtPtr xmlSchemaNewParserCtxt(const char *URL); +void xmlSchemaFreeParserCtxt (xmlSchemaParserCtxtPtr ctxt); +void xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +xmlSchemaPtr xmlSchemaParse (xmlSchemaParserCtxtPtr ctxt); +void xmlSchemaFree (xmlSchemaPtr schema); +void xmlSchemaDump (FILE *output, + xmlSchemaPtr schema); +/* + * Interfaces for validating + */ +void xmlSchemaSetValidErrors (xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +xmlSchemaValidCtxtPtr xmlSchemaNewValidCtxt (xmlSchemaPtr schema); +void xmlSchemaFreeValidCtxt (xmlSchemaValidCtxtPtr ctxt); +int xmlSchemaValidateDoc (xmlSchemaValidCtxtPtr ctxt, + xmlDocPtr instance); +int xmlSchemaValidateStream (xmlSchemaValidCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc, + xmlSAXHandlerPtr sax, + void *user_data); +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_H__ */ diff --git a/include/libxml/xmlschemastypes.h b/include/libxml/xmlschemastypes.h new file mode 100644 index 00000000..b1788da6 --- /dev/null +++ b/include/libxml/xmlschemastypes.h @@ -0,0 +1,45 @@ +/* + * schemastypes.c : interface of the XML Schema Datatypes + * definition and validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + + +#ifndef __XML_SCHEMA_TYPES_H__ +#define __XML_SCHEMA_TYPES_H__ + +#if defined(WIN32) && defined(_MSC_VER) +#include +#else +#include +#endif +#ifdef LIBXML_SCHEMAS_ENABLED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void xmlSchemaInitTypes (void); +void xmlSchemaCleanupTypes (void); +xmlSchemaTypePtr xmlSchemaGetPredefinedType (const xmlChar *name, + const xmlChar *ns); +int xmlSchemaValidatePredefinedType (xmlSchemaTypePtr type, + const xmlChar *value, + xmlSchemaValPtr *val); +int xmlSchemaValidateFacet (xmlSchemaTypePtr base, + xmlSchemaFacetPtr facet, + const xmlChar *value, + xmlSchemaValPtr val); +void xmlSchemaFreeValue (xmlSchemaValPtr val); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_TYPES_H__ */ diff --git a/include/libxml/xmlunicode.h b/include/libxml/xmlunicode.h new file mode 100644 index 00000000..f0f1fe9c --- /dev/null +++ b/include/libxml/xmlunicode.h @@ -0,0 +1,164 @@ +/* + * xmlunicode.h: this header exports interfaces for the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: Tue Apr 16 17:28:05 2002 + * Sources: Blocks-4.txt UnicodeData-3.1.0.txt + * Daniel Veillard + */ + +#ifndef __XML_UNICODE_H__ +#define __XML_UNICODE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +int xmlUCSIsAlphabeticPresentationForms (int code); +int xmlUCSIsArabic (int code); +int xmlUCSIsArabicPresentationFormsA (int code); +int xmlUCSIsArabicPresentationFormsB (int code); +int xmlUCSIsArmenian (int code); +int xmlUCSIsArrows (int code); +int xmlUCSIsBasicLatin (int code); +int xmlUCSIsBengali (int code); +int xmlUCSIsBlockElements (int code); +int xmlUCSIsBopomofo (int code); +int xmlUCSIsBopomofoExtended (int code); +int xmlUCSIsBoxDrawing (int code); +int xmlUCSIsBraillePatterns (int code); +int xmlUCSIsByzantineMusicalSymbols (int code); +int xmlUCSIsCJKCompatibility (int code); +int xmlUCSIsCJKCompatibilityForms (int code); +int xmlUCSIsCJKCompatibilityIdeographs (int code); +int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code); +int xmlUCSIsCJKRadicalsSupplement (int code); +int xmlUCSIsCJKSymbolsandPunctuation (int code); +int xmlUCSIsCJKUnifiedIdeographs (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code); +int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code); +int xmlUCSIsCherokee (int code); +int xmlUCSIsCombiningDiacriticalMarks (int code); +int xmlUCSIsCombiningHalfMarks (int code); +int xmlUCSIsCombiningMarksforSymbols (int code); +int xmlUCSIsControlPictures (int code); +int xmlUCSIsCurrencySymbols (int code); +int xmlUCSIsCyrillic (int code); +int xmlUCSIsDeseret (int code); +int xmlUCSIsDevanagari (int code); +int xmlUCSIsDingbats (int code); +int xmlUCSIsEnclosedAlphanumerics (int code); +int xmlUCSIsEnclosedCJKLettersandMonths (int code); +int xmlUCSIsEthiopic (int code); +int xmlUCSIsGeneralPunctuation (int code); +int xmlUCSIsGeometricShapes (int code); +int xmlUCSIsGeorgian (int code); +int xmlUCSIsGothic (int code); +int xmlUCSIsGreek (int code); +int xmlUCSIsGreekExtended (int code); +int xmlUCSIsGujarati (int code); +int xmlUCSIsGurmukhi (int code); +int xmlUCSIsHalfwidthandFullwidthForms (int code); +int xmlUCSIsHangulCompatibilityJamo (int code); +int xmlUCSIsHangulJamo (int code); +int xmlUCSIsHangulSyllables (int code); +int xmlUCSIsHebrew (int code); +int xmlUCSIsHighPrivateUseSurrogates (int code); +int xmlUCSIsHighSurrogates (int code); +int xmlUCSIsHiragana (int code); +int xmlUCSIsIPAExtensions (int code); +int xmlUCSIsIdeographicDescriptionCharacters (int code); +int xmlUCSIsKanbun (int code); +int xmlUCSIsKangxiRadicals (int code); +int xmlUCSIsKannada (int code); +int xmlUCSIsKatakana (int code); +int xmlUCSIsKhmer (int code); +int xmlUCSIsLao (int code); +int xmlUCSIsLatin1Supplement (int code); +int xmlUCSIsLatinExtendedA (int code); +int xmlUCSIsLatinExtendedB (int code); +int xmlUCSIsLatinExtendedAdditional (int code); +int xmlUCSIsLetterlikeSymbols (int code); +int xmlUCSIsLowSurrogates (int code); +int xmlUCSIsMalayalam (int code); +int xmlUCSIsMathematicalAlphanumericSymbols (int code); +int xmlUCSIsMathematicalOperators (int code); +int xmlUCSIsMiscellaneousSymbols (int code); +int xmlUCSIsMiscellaneousTechnical (int code); +int xmlUCSIsMongolian (int code); +int xmlUCSIsMusicalSymbols (int code); +int xmlUCSIsMyanmar (int code); +int xmlUCSIsNumberForms (int code); +int xmlUCSIsOgham (int code); +int xmlUCSIsOldItalic (int code); +int xmlUCSIsOpticalCharacterRecognition (int code); +int xmlUCSIsOriya (int code); +int xmlUCSIsPrivateUse (int code); +int xmlUCSIsRunic (int code); +int xmlUCSIsSinhala (int code); +int xmlUCSIsSmallFormVariants (int code); +int xmlUCSIsSpacingModifierLetters (int code); +int xmlUCSIsSpecials (int code); +int xmlUCSIsSuperscriptsandSubscripts (int code); +int xmlUCSIsSyriac (int code); +int xmlUCSIsTags (int code); +int xmlUCSIsTamil (int code); +int xmlUCSIsTelugu (int code); +int xmlUCSIsThaana (int code); +int xmlUCSIsThai (int code); +int xmlUCSIsTibetan (int code); +int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code); +int xmlUCSIsYiRadicals (int code); +int xmlUCSIsYiSyllables (int code); + +int xmlUCSIsBlock (int code, + const char *block); + +int xmlUCSIsCatC (int code); +int xmlUCSIsCatCc (int code); +int xmlUCSIsCatCf (int code); +int xmlUCSIsCatCo (int code); +int xmlUCSIsCatCs (int code); +int xmlUCSIsCatL (int code); +int xmlUCSIsCatLl (int code); +int xmlUCSIsCatLm (int code); +int xmlUCSIsCatLo (int code); +int xmlUCSIsCatLt (int code); +int xmlUCSIsCatLu (int code); +int xmlUCSIsCatM (int code); +int xmlUCSIsCatMc (int code); +int xmlUCSIsCatMe (int code); +int xmlUCSIsCatMn (int code); +int xmlUCSIsCatN (int code); +int xmlUCSIsCatNd (int code); +int xmlUCSIsCatNl (int code); +int xmlUCSIsCatNo (int code); +int xmlUCSIsCatP (int code); +int xmlUCSIsCatPc (int code); +int xmlUCSIsCatPd (int code); +int xmlUCSIsCatPe (int code); +int xmlUCSIsCatPf (int code); +int xmlUCSIsCatPi (int code); +int xmlUCSIsCatPo (int code); +int xmlUCSIsCatPs (int code); +int xmlUCSIsCatS (int code); +int xmlUCSIsCatSc (int code); +int xmlUCSIsCatSk (int code); +int xmlUCSIsCatSm (int code); +int xmlUCSIsCatSo (int code); +int xmlUCSIsCatZ (int code); +int xmlUCSIsCatZl (int code); +int xmlUCSIsCatZp (int code); +int xmlUCSIsCatZs (int code); + +int xmlUCSIsCat (int code, + const char *cat); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_UNICODE_H__ */ diff --git a/include/libxml/xmlversion.h.in b/include/libxml/xmlversion.h.in index d99c58af..777e9f73 100644 --- a/include/libxml/xmlversion.h.in +++ b/include/libxml/xmlversion.h.in @@ -189,6 +189,42 @@ extern void xmlCheckVersion(int version); #define DEBUG_MEMORY_LOCATION #endif +/** + * LIBXML_UNICODE_ENABLED + * + * Whether the Unicode related interfaces are compiled in + */ +#if @WITH_SCHEMAS@ +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED + * + * Whether the regular expressions interfaces are compiled in + */ +#if @WITH_SCHEMAS@ +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED + * + * Whether the automata interfaces are compiled in + */ +#if @WITH_SCHEMAS@ +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED + * + * Whether the Schemas validation interfaces are compiled in + */ +#if @WITH_SCHEMAS@ +#define LIBXML_SCHEMAS_ENABLED +#endif + /** * LIBXML_DLL_IMPORT: * diff --git a/result/automata/a b/result/automata/a new file mode 100644 index 00000000..4ece4115 --- /dev/null +++ b/result/automata/a @@ -0,0 +1,4 @@ +=> Passed +=> Failed +=> Failed +=> Failed diff --git a/result/automata/aba b/result/automata/aba new file mode 100644 index 00000000..051b9bd4 --- /dev/null +++ b/result/automata/aba @@ -0,0 +1,6 @@ +=> Passed +=> Passed +=> Passed +=> Failed +=> Failed +=> Failed diff --git a/result/automata/abaa b/result/automata/abaa new file mode 100644 index 00000000..c74769e3 --- /dev/null +++ b/result/automata/abaa @@ -0,0 +1,5 @@ +=> Passed +=> Passed +=> Passed +=> Failed +=> Failed diff --git a/result/automata/abba b/result/automata/abba new file mode 100644 index 00000000..1a608488 --- /dev/null +++ b/result/automata/abba @@ -0,0 +1,4 @@ +=> Passed +=> Passed +=> Failed +=> Failed diff --git a/result/automata/po b/result/automata/po new file mode 100644 index 00000000..fafcae55 --- /dev/null +++ b/result/automata/po @@ -0,0 +1,2 @@ +=> Passed +=> Passed diff --git a/result/regexp/content b/result/regexp/content new file mode 100644 index 00000000..220fd476 --- /dev/null +++ b/result/regexp/content @@ -0,0 +1,12 @@ +Regexp: ((a|b|c)def) +adef: Ok +bdef: Ok +adefg: Fail +aaef: Fail +Regexp: ((a|b|c|d|e|f)?(g|h|i)+(k|l)*) +g: Ok +gi: Ok +fil: Ok +gikl: Ok +cghhhiill: Ok +ak: Fail diff --git a/result/regexp/hard b/result/regexp/hard new file mode 100644 index 00000000..f348c08d --- /dev/null +++ b/result/regexp/hard @@ -0,0 +1,7 @@ +Regexp: ((a|b|\p{Nd}){1,2}|aaa|bbbb){1,2} +bab: Ok +aaca: Fail +aaabbbb: Ok +a0b: Ok +aa0aaa: Fail +b0aaa: Ok diff --git a/result/regexp/ncname b/result/regexp/ncname new file mode 100644 index 00000000..3f16d952 --- /dev/null +++ b/result/regexp/ncname @@ -0,0 +1,6 @@ +Regexp: [\i-[:]][\c-[:]]* +a: Ok +abc: Ok +abc1d: Ok +1ac: Fail +a1b:c: Fail diff --git a/result/regexp/ranges b/result/regexp/ranges new file mode 100644 index 00000000..4cbf2982 --- /dev/null +++ b/result/regexp/ranges @@ -0,0 +1,15 @@ +Regexp: a{2,3} +a: Fail +aa: Ok +aaa: Ok +aaaa: Fail +Regexp: ba{2,3}c +bac: Fail +baac: Ok +baaac: Ok +baaaac: Fail +Regexp: a(b|c){2,3}d +abcd: Ok +acccd: Ok +abd: Fail +accccd: Fail diff --git a/result/regexp/xpath b/result/regexp/xpath new file mode 100644 index 00000000..4f6b13c3 --- /dev/null +++ b/result/regexp/xpath @@ -0,0 +1,32 @@ +Regexp: (\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*(\|(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*)* +a: Ok +a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3: Ok +*: Ok +a|b: Ok +.//a:b: Ok +a/b/c: Ok +a/*/b: Ok +a:*/b:*/c:*: Ok +child::a/child::b:*: Ok +child::a/child::b:*|a/*/b|.//a:b: Ok +1: Fail +1ab: Fail +a:1: Ok +@a: Fail +ancestor::a: Ok +Regexp: (\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*(\|(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*)* +a: Ok +a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3: Ok +*: Ok +a|b: Ok +.//a:b: Ok +a/b/c: Ok +a/*/b: Ok +a:*/b:*/c:*: Ok +child::a/child::b:*: Ok +child::a/child::b:*|a/*/b|.//a:b: Ok +1: Fail +1ab: Fail +a:1: Fail +@a: Fail +ancestor::a: Fail diff --git a/result/schemas/po b/result/schemas/po new file mode 100644 index 00000000..e69de29b diff --git a/test/automata/a b/test/automata/a new file mode 100644 index 00000000..e1f3e918 --- /dev/null +++ b/test/automata/a @@ -0,0 +1,14 @@ +# +# tests just "a" +# +t 0 1 a +f 1 +------- +a +=> +a +a +=> +=> +b +=> diff --git a/test/automata/aba b/test/automata/aba new file mode 100644 index 00000000..ee9a8735 --- /dev/null +++ b/test/automata/aba @@ -0,0 +1,26 @@ +# +# Tests a[ab]* +# +t 0 1 a +t 1 1 a +t 1 1 b +f 1 +------- +a +=> +a +a +=> +a +b +a +b +a +b +=> +b +=> +a +c +=> +=> diff --git a/test/automata/abaa b/test/automata/abaa new file mode 100644 index 00000000..7862ba22 --- /dev/null +++ b/test/automata/abaa @@ -0,0 +1,43 @@ +# +# Tests: a[ab]*a{2,3} +# +t 0 1 a +t 1 1 a +t 1 1 b +c 1 2 2 3 a +f 2 +------- +a +a +a +=> +# Pass +a +b +a +a +=> +# Pass +a +a +a +a +a +a +a +a +a +=> +# Pass +a +b +a +=> +# Fail +a +b +a +a +b +=> +# Fail diff --git a/test/automata/abba b/test/automata/abba new file mode 100644 index 00000000..86c08f13 --- /dev/null +++ b/test/automata/abba @@ -0,0 +1,30 @@ +# +# Tests ab*a with an eliminated epsilon transition +# +t 0 1 a +t 1 2 b +e 1 2 +t 2 2 b +t 2 3 a +f 3 +------- +a +a +=> +# Pass +a +b +b +a +=> +# Pass +a +b +=> +# Fail +a +b +a +b +=> +# Fail diff --git a/test/automata/po b/test/automata/po new file mode 100644 index 00000000..592b8c9d --- /dev/null +++ b/test/automata/po @@ -0,0 +1,19 @@ +# +# purchaseOrder +# +t 0 1 shipTo +t 1 2 billTo +t 2 3 comment +t 3 4 items +e 2 3 +f 4 +------- +shipTo +billTo +comment +items +=> +shipTo +billTo +items +=> diff --git a/test/regexp/content b/test/regexp/content new file mode 100644 index 00000000..9d01c8b2 --- /dev/null +++ b/test/regexp/content @@ -0,0 +1,12 @@ +=>((a|b|c)def) +adef +bdef +adefg +aaef +=>((a|b|c|d|e|f)?(g|h|i)+(k|l)*) +g +gi +fil +gikl +cghhhiill +ak diff --git a/test/regexp/hard b/test/regexp/hard new file mode 100644 index 00000000..40c6d2d4 --- /dev/null +++ b/test/regexp/hard @@ -0,0 +1,7 @@ +=>((a|b|\p{Nd}){1,2}|aaa|bbbb){1,2} +bab +aaca +aaabbbb +a0b +aa0aaa +b0aaa diff --git a/test/regexp/ncname b/test/regexp/ncname new file mode 100644 index 00000000..1e452a89 --- /dev/null +++ b/test/regexp/ncname @@ -0,0 +1,6 @@ +=>[\i-[:]][\c-[:]]* +a +abc +abc1d +1ac +a1b:c diff --git a/test/regexp/ranges b/test/regexp/ranges new file mode 100644 index 00000000..cb7c22f9 --- /dev/null +++ b/test/regexp/ranges @@ -0,0 +1,15 @@ +=>a{2,3} +a +aa +aaa +aaaa +=>ba{2,3}c +bac +baac +baaac +baaaac +=>a(b|c){2,3}d +abcd +acccd +abd +accccd diff --git a/test/regexp/xpath b/test/regexp/xpath new file mode 100644 index 00000000..62c18c41 --- /dev/null +++ b/test/regexp/xpath @@ -0,0 +1,37 @@ +=>(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*(\|(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*)* +a +a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3 +* +a|b +.//a:b +a/b/c +a/*/b +a:*/b:*/c:* +child::a/child::b:* +child::a/child::b:*|a/*/b|.//a:b +1 +1ab +a:1 +@a +ancestor::a +# +# the previous regexp from the Schemas for Schemas was broken +# here is the fixed one: +# http://lists.w3.org/Archives/Public/www-xml-schema-comments/2002AprJun/0005.html +# +=>(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*(\|(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*)* +a +a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3 +* +a|b +.//a:b +a/b/c +a/*/b +a:*/b:*/c:* +child::a/child::b:* +child::a/child::b:*|a/*/b|.//a:b +1 +1ab +a:1 +@a +ancestor::a diff --git a/test/schemas/po.xml b/test/schemas/po.xml new file mode 100644 index 00000000..387232d1 --- /dev/null +++ b/test/schemas/po.xml @@ -0,0 +1,32 @@ + + + + Alice Smith + 123 Maple Street + Mill Valley + CA + 90952 + + + Robert Smith + 8 Oak Avenue + Old Town + PA + 95819 + + Hurry, my lawn is going wild! + + + Lawnmower + 1 + 148.95 + Confirm this is electric + + + Baby Monitor + 1 + 39.98 + 1999-05-21 + + + diff --git a/test/schemas/po.xsd b/test/schemas/po.xsd new file mode 100644 index 00000000..5a1e6608 --- /dev/null +++ b/test/schemas/po.xsd @@ -0,0 +1,59 @@ + + + + +Purchase order schema for Example.com. + Copyright 2000 Example.com. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/testAutomata.c b/testAutomata.c new file mode 100644 index 00000000..8e657863 --- /dev/null +++ b/testAutomata.c @@ -0,0 +1,306 @@ +/* + * testRegexp.c: simple module for testing regular expressions + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#include +#include "libxml.h" +#ifdef LIBXML_AUTOMATA_ENABLED + +#include + +static int scanNumber(char **ptr) { + int ret = 0; + char *cur; + + cur = *ptr; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + cur++; + } + *ptr = cur; + return(ret); +} + +static void +testRegexpFile(const char *filename) { + FILE *input; + char exp[5000]; + int len; + int ret; + int i; + xmlAutomataPtr am; + xmlAutomataStatePtr states[1000]; + xmlRegexpPtr regexp = NULL; + xmlRegExecCtxtPtr exec; + + for (i = 0;i<1000;i++) + states[i] = NULL; + + input = fopen(filename, "r"); + if (input == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot open %s for reading\n", filename); + return; + } + + am = xmlNewAutomata(); + if (am == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot create automata\n"); + fclose(input); + } + states[0] = xmlAutomataGetInitState(am); + if (states[0] == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot get start state\n"); + xmlFreeAutomata(am); + fclose(input); + } + ret = 0; + + while (fgets(exp, 4500, input) != NULL) { + if (exp[0] == '#') + continue; + len = strlen(exp); + len--; + while ((len >= 0) && + ((exp[len] == '\n') || (exp[len] == '\t') || + (exp[len] == '\r') || (exp[len] == ' '))) len--; + exp[len + 1] = 0; + if (len >= 0) { + if ((am != NULL) && (exp[0] == 't') && (exp[1] == ' ')) { + char *ptr = &exp[2]; + int from, to; + + from = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + if (states[from] == NULL) + states[from] = xmlAutomataNewState(am); + ptr++; + to = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + if (states[to] == NULL) + states[to] = xmlAutomataNewState(am); + ptr++; + xmlAutomataNewTransition(am, states[from], states[to], + BAD_CAST ptr, NULL); + } else if ((am != NULL) && (exp[0] == 'e') && (exp[1] == ' ')) { + char *ptr = &exp[2]; + int from, to; + + from = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + if (states[from] == NULL) + states[from] = xmlAutomataNewState(am); + ptr++; + to = scanNumber(&ptr); + if (states[to] == NULL) + states[to] = xmlAutomataNewState(am); + xmlAutomataNewEpsilon(am, states[from], states[to]); + } else if ((am != NULL) && (exp[0] == 'f') && (exp[1] == ' ')) { + char *ptr = &exp[2]; + int state; + + state = scanNumber(&ptr); + if (states[state] == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Bad state %d : %s\n", state, exp); + break; + } + xmlAutomataSetFinalState(am, states[state]); + } else if ((am != NULL) && (exp[0] == 'c') && (exp[1] == ' ')) { + char *ptr = &exp[2]; + int from, to; + int min, max; + + from = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + if (states[from] == NULL) + states[from] = xmlAutomataNewState(am); + ptr++; + to = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + if (states[to] == NULL) + states[to] = xmlAutomataNewState(am); + ptr++; + min = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + ptr++; + max = scanNumber(&ptr); + if (*ptr != ' ') { + xmlGenericError(xmlGenericErrorContext, + "Bad line %s\n", exp); + break; + } + ptr++; + xmlAutomataNewCountTrans(am, states[from], states[to], + BAD_CAST ptr, min, max, NULL); + } else if ((am != NULL) && (exp[0] == '-') && (exp[1] == '-')) { + /* end of the automata */ + regexp = xmlAutomataCompile(am); + xmlFreeAutomata(am); + am = NULL; + if (regexp == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to compile the automata"); + break; + } + } else if ((exp[0] == '=') && (exp[1] == '>')) { + if (regexp == NULL) { + printf("=> failed not compiled\n"); + } else { + if (exec == NULL) + exec = xmlRegNewExecCtxt(regexp, NULL, NULL); + if (ret == 0) { + ret = xmlRegExecPushString(exec, NULL, NULL); + } + if (ret == 1) + printf("=> Passed\n"); + else if ((ret == 0) || (ret == -1)) + printf("=> Failed\n"); + else if (ret < 0) + printf("=> Error\n"); + xmlRegFreeExecCtxt(exec); + exec = NULL; + } + ret = 0; + } else if (regexp != NULL) { + if (exec == NULL) + exec = xmlRegNewExecCtxt(regexp, NULL, NULL); + ret = xmlRegExecPushString(exec, BAD_CAST exp, NULL); + } else { + xmlGenericError(xmlGenericErrorContext, + "Unexpected line %s\n", exp); + } + } + } + fclose(input); + if (regexp != NULL) + xmlRegFreeRegexp(regexp); + if (exec != NULL) + xmlRegFreeExecCtxt(exec); + if (am != NULL) + xmlFreeAutomata(am); +} + +int main(int argc, char **argv) { + + xmlInitMemory(); + + if (argc == 1) { + int ret; + xmlAutomataPtr am; + xmlAutomataStatePtr start, cur; + xmlRegexpPtr regexp; + xmlRegExecCtxtPtr exec; + + am = xmlNewAutomata(); + start = xmlAutomataGetInitState(am); + + /* generate a[ba]*a */ + cur = xmlAutomataNewTransition(am, start, NULL, BAD_CAST"a", NULL); + xmlAutomataNewTransition(am, cur, cur, BAD_CAST"b", NULL); + xmlAutomataNewTransition(am, cur, cur, BAD_CAST"a", NULL); + cur = xmlAutomataNewCountTrans(am, cur, NULL, BAD_CAST"a", 2, 3, NULL); + xmlAutomataSetFinalState(am, cur); + + /* compile it in a regexp and free the automata */ + regexp = xmlAutomataCompile(am); + xmlFreeAutomata(am); + + /* test the regexp */ + xmlRegexpPrint(stdout, regexp); + exec = xmlRegNewExecCtxt(regexp, NULL, NULL); + ret = xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"b", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + if (ret == 0) { + ret = xmlRegExecPushString(exec, NULL, NULL); + if (ret == 1) + printf("final\n"); + else if (ret < 0) + printf("error\n"); + } + xmlRegFreeExecCtxt(exec); + + /* free the regexp */ + xmlRegFreeRegexp(regexp); + } else { + int i; + + for (i = 1;i < argc;i++) + testRegexpFile(argv[i]); + } + + xmlCleanupParser(); + xmlMemoryDump(); + return(0); +} + +#else +#include +int main(int argc, char **argv) { + printf("%s : Automata support not compiled in\n", argv[0]); + return(0); +} +#endif /* LIBXML_AUTOMATA_ENABLED */ diff --git a/testRegexp.c b/testRegexp.c new file mode 100644 index 00000000..a1d0d270 --- /dev/null +++ b/testRegexp.c @@ -0,0 +1,157 @@ +/* + * testRegexp.c: simple module for testing regular expressions + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#include +#include "libxml.h" +#ifdef LIBXML_REGEXP_ENABLED +#include +#include + +int repeat = 0; +int debug = 0; + +static void testRegexp(xmlRegexpPtr comp, const char *value) { + int ret; + + ret = xmlRegexpExec(comp, (const xmlChar *) value); + if (ret == 1) + printf("%s: Ok\n", value); + else if (ret == 0) + printf("%s: Fail\n", value); + else + printf("%s: Error: %d\n", value, ret); + if (repeat) { + int j; + for (j = 0;j < 999999;j++) + xmlRegexpExec(comp, (const xmlChar *) value); + } +} + +static void +testRegexpFile(const char *filename) { + xmlRegexpPtr comp = NULL; + FILE *input; + char expression[5000]; + int len; + + input = fopen(filename, "r"); + if (input == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot open %s for reading\n", filename); + return; + } + while (fgets(expression, 4500, input) != NULL) { + len = strlen(expression); + len--; + while ((len >= 0) && + ((expression[len] == '\n') || (expression[len] == '\t') || + (expression[len] == '\r') || (expression[len] == ' '))) len--; + expression[len + 1] = 0; + if (len >= 0) { + if (expression[0] == '#') + continue; + if ((expression[0] == '=') && (expression[1] == '>')) { + char *pattern = &expression[2]; + + if (comp != NULL) { + xmlRegFreeRegexp(comp); + comp = NULL; + } + printf("Regexp: %s\n", pattern) ; + comp = xmlRegexpCompile((const xmlChar *) pattern); + if (comp == NULL) { + printf(" failed to compile\n"); + break; + } + } else if (comp == NULL) { + printf("Regexp: %s\n", expression) ; + comp = xmlRegexpCompile((const xmlChar *) expression); + if (comp == NULL) { + printf(" failed to compile\n"); + break; + } + } else if (comp != NULL) { + testRegexp(comp, expression); + } + } + } + fclose(input); + if (comp != NULL) + xmlRegFreeRegexp(comp); +} + + +static void usage(const char *name) { + fprintf(stderr, "Usage: %s\n", name); +} + +int main(int argc, char **argv) { + xmlRegexpPtr comp = NULL; + const char *pattern = NULL; + char *filename = NULL; + int i; + + xmlInitMemory(); + + if (argc <= 1) { + usage(argv[0]); + return(1); + } + for (i = 1; i < argc ; i++) { + if (!strcmp(argv[i], "-")) + break; + + if (argv[i][0] != '-') + continue; + if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) { + debug++; + } else if ((!strcmp(argv[i], "-repeat")) || + (!strcmp(argv[i], "--repeat"))) { + repeat++; + } else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "--input"))) + filename = argv[++i]; + else { + fprintf(stderr, "Unknown option %s\n", argv[i]); + usage(argv[0]); + } + } + if (filename != NULL) { + testRegexpFile(filename); + } else { + for (i = 1; i < argc ; i++) { + if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { + if (pattern == NULL) { + pattern = argv[i]; + printf("Testing %s:\n", pattern); + comp = xmlRegexpCompile((const xmlChar *) pattern); + if (comp == NULL) { + printf(" failed to compile\n"); + break; + } + if (debug) + xmlRegexpPrint(stdout, comp); + } else { + testRegexp(comp, argv[i]); + } + } + } + if (comp != NULL) + xmlRegFreeRegexp(comp); + } + xmlCleanupParser(); + xmlMemoryDump(); + return(0); +} + +#else +#include +int main(int argc, char **argv) { + printf("%s : Regexp support not compiled in\n", argv[0]); + return(0); +} +#endif /* LIBXML_REGEXP_ENABLED */ diff --git a/testSchemas.c b/testSchemas.c new file mode 100644 index 00000000..31681676 --- /dev/null +++ b/testSchemas.c @@ -0,0 +1,120 @@ +/* + * testSchemas.c : a small tester program for Schema validation + * + * See Copyright for the status of this software. + * + * Daniel.Veillard@w3.org + */ + +#include "libxml.h" +#ifdef LIBXML_SCHEMAS_ENABLED + +#include +#include + +#include +#include +#include + + +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif + +#include +#include +#include + +#ifdef LIBXML_DEBUG_ENABLED +static int debug = 0; +#endif +static int noout = 0; + + +int main(int argc, char **argv) { + int i; + int files = 0; + xmlSchemaPtr schema = NULL; + + for (i = 1; i < argc ; i++) { + if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) + debug++; + else + if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) { + noout++; + } + } + xmlLineNumbersDefault(1); + for (i = 1; i < argc ; i++) { + if (argv[i][0] != '-') { + if (schema == NULL) { + xmlSchemaParserCtxtPtr ctxt; + + ctxt = xmlSchemaNewParserCtxt(argv[i]); + xmlSchemaSetParserErrors(ctxt, + (xmlSchemaValidityErrorFunc) fprintf, + (xmlSchemaValidityWarningFunc) fprintf, + stderr); + schema = xmlSchemaParse(ctxt); + xmlSchemaFreeParserCtxt(ctxt); + if (debug) + xmlSchemaDump(stdout, schema); + } else { + xmlDocPtr doc; + + doc = xmlParseFile(argv[i]); + + if (doc == NULL) { + fprintf(stderr, "Could not parse %s\n", argv[i]); + } else { + xmlSchemaValidCtxtPtr ctxt; + int ret; + + ctxt = xmlSchemaNewValidCtxt(schema); + xmlSchemaSetValidErrors(ctxt, + (xmlSchemaValidityErrorFunc) fprintf, + (xmlSchemaValidityWarningFunc) fprintf, + stderr); + ret = xmlSchemaValidateDoc(ctxt, doc); + xmlSchemaFreeValidCtxt(ctxt); + xmlFreeDoc(doc); + } + } + files ++; + } + } + if (schema != NULL) + xmlSchemaFree(schema); + if (files == 0) { + printf("Usage : %s [--debug] [--noout] schemas XMLfiles ...\n", + argv[0]); + printf("\tParse the HTML files and output the result of the parsing\n"); + printf("\t--debug : dump a debug tree of the in-memory document\n"); + printf("\t--noout : do not print the result\n"); + } + xmlSchemaCleanupTypes(); + xmlCleanupParser(); + xmlMemoryDump(); + + return(0); +} + +#else +#include +int main(int argc, char **argv) { + printf("%s : Schemas support not compiled in\n", argv[0]); + return(0); +} +#endif /* LIBXML_SCHEMAS_ENABLED */ diff --git a/xmlregexp.c b/xmlregexp.c new file mode 100644 index 00000000..1139e158 --- /dev/null +++ b/xmlregexp.c @@ -0,0 +1,3470 @@ +/* + * regexp.c: generic and extensible Regular Expression engine + * + * Basically designed with the purpose of compiling regexps for + * the variety of validation/shemas mechanisms now available in + * XML related specifications thise includes: + * - XML-1.0 DTD validation + * - XML Schemas structure part 1 + * - XML Schemas Datatypes part 2 especially Appendix F + * - RELAX-NG/TREX i.e. the counter proposal + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_REGEXP_ENABLED + +#include +#include +#include +#include +#include +#include +#include + +/* #define DEBUG_REGEXP_GRAPH */ +/* #define DEBUG_REGEXP_EXEC */ +/* #define DEBUG_PUSH */ + +#define ERROR(str) ctxt->error = 1; \ + xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur) +#define NEXT ctxt->cur++ +#define CUR (*(ctxt->cur)) +#define NXT(index) (ctxt->cur[index]) + +#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) +#define NEXTL(l) ctxt->cur += l; + + +/************************************************************************ + * * + * Datatypes and structures * + * * + ************************************************************************/ + +typedef enum { + XML_REGEXP_EPSILON = 1, + XML_REGEXP_CHARVAL, + XML_REGEXP_RANGES, + XML_REGEXP_SUBREG, + XML_REGEXP_STRING, + XML_REGEXP_ANYCHAR, /* . */ + XML_REGEXP_ANYSPACE, /* \s */ + XML_REGEXP_NOTSPACE, /* \S */ + XML_REGEXP_INITNAME, /* \l */ + XML_REGEXP_NOTINITNAME, /* \l */ + XML_REGEXP_NAMECHAR, /* \c */ + XML_REGEXP_NOTNAMECHAR, /* \C */ + XML_REGEXP_DECIMAL, /* \d */ + XML_REGEXP_NOTDECIMAL, /* \d */ + XML_REGEXP_REALCHAR, /* \w */ + XML_REGEXP_NOTREALCHAR, /* \w */ + XML_REGEXP_LETTER, + XML_REGEXP_LETTER_UPPERCASE, + XML_REGEXP_LETTER_LOWERCASE, + XML_REGEXP_LETTER_TITLECASE, + XML_REGEXP_LETTER_MODIFIER, + XML_REGEXP_LETTER_OTHERS, + XML_REGEXP_MARK, + XML_REGEXP_MARK_NONSPACING, + XML_REGEXP_MARK_SPACECOMBINING, + XML_REGEXP_MARK_ENCLOSING, + XML_REGEXP_NUMBER, + XML_REGEXP_NUMBER_DECIMAL, + XML_REGEXP_NUMBER_LETTER, + XML_REGEXP_NUMBER_OTHERS, + XML_REGEXP_PUNCT, + XML_REGEXP_PUNCT_CONNECTOR, + XML_REGEXP_PUNCT_DASH, + XML_REGEXP_PUNCT_OPEN, + XML_REGEXP_PUNCT_CLOSE, + XML_REGEXP_PUNCT_INITQUOTE, + XML_REGEXP_PUNCT_FINQUOTE, + XML_REGEXP_PUNCT_OTHERS, + XML_REGEXP_SEPAR, + XML_REGEXP_SEPAR_SPACE, + XML_REGEXP_SEPAR_LINE, + XML_REGEXP_SEPAR_PARA, + XML_REGEXP_SYMBOL, + XML_REGEXP_SYMBOL_MATH, + XML_REGEXP_SYMBOL_CURRENCY, + XML_REGEXP_SYMBOL_MODIFIER, + XML_REGEXP_SYMBOL_OTHERS, + XML_REGEXP_OTHER, + XML_REGEXP_OTHER_CONTROL, + XML_REGEXP_OTHER_FORMAT, + XML_REGEXP_OTHER_PRIVATE, + XML_REGEXP_OTHER_NA, + XML_REGEXP_BLOCK_NAME +} xmlRegAtomType; + +typedef enum { + XML_REGEXP_QUANT_EPSILON = 1, + XML_REGEXP_QUANT_ONCE, + XML_REGEXP_QUANT_OPT, + XML_REGEXP_QUANT_MULT, + XML_REGEXP_QUANT_PLUS, + XML_REGEXP_QUANT_RANGE +} xmlRegQuantType; + +typedef enum { + XML_REGEXP_START_STATE = 1, + XML_REGEXP_FINAL_STATE, + XML_REGEXP_TRANS_STATE +} xmlRegStateType; + +typedef enum { + XML_REGEXP_MARK_NORMAL = 0, + XML_REGEXP_MARK_START, + XML_REGEXP_MARK_VISITED +} xmlRegMarkedType; + +typedef struct _xmlRegRange xmlRegRange; +typedef xmlRegRange *xmlRegRangePtr; + +struct _xmlRegRange { + int neg; + xmlRegAtomType type; + int start; + int end; + xmlChar *blockName; +}; + +typedef struct _xmlRegAtom xmlRegAtom; +typedef xmlRegAtom *xmlRegAtomPtr; + +typedef struct _xmlAutomataState xmlRegState; +typedef xmlRegState *xmlRegStatePtr; + +struct _xmlRegAtom { + int no; + xmlRegAtomType type; + xmlRegQuantType quant; + int min; + int max; + + void *valuep; + int neg; + int codepoint; + xmlRegStatePtr start; + xmlRegStatePtr stop; + int maxRanges; + int nbRanges; + xmlRegRangePtr *ranges; + void *data; +}; + +typedef struct _xmlRegCounter xmlRegCounter; +typedef xmlRegCounter *xmlRegCounterPtr; + +struct _xmlRegCounter { + int min; + int max; +}; + +typedef struct _xmlRegTrans xmlRegTrans; +typedef xmlRegTrans *xmlRegTransPtr; + +struct _xmlRegTrans { + xmlRegAtomPtr atom; + int to; + int counter; + int count; +}; + +struct _xmlAutomataState { + xmlRegStateType type; + xmlRegMarkedType mark; + int no; + + int maxTrans; + int nbTrans; + xmlRegTrans *trans; +}; + +typedef struct _xmlAutomata xmlRegParserCtxt; +typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; + +struct _xmlAutomata { + xmlChar *string; + xmlChar *cur; + + int error; + int neg; + + xmlRegStatePtr start; + xmlRegStatePtr end; + xmlRegStatePtr state; + + xmlRegAtomPtr atom; + + int maxAtoms; + int nbAtoms; + xmlRegAtomPtr *atoms; + + int maxStates; + int nbStates; + xmlRegStatePtr *states; + + int maxCounters; + int nbCounters; + xmlRegCounter *counters; +}; + +struct _xmlRegexp { + xmlChar *string; + int nbStates; + xmlRegStatePtr *states; + int nbAtoms; + xmlRegAtomPtr *atoms; + int nbCounters; + xmlRegCounter *counters; +}; + +typedef struct _xmlRegExecRollback xmlRegExecRollback; +typedef xmlRegExecRollback *xmlRegExecRollbackPtr; + +struct _xmlRegExecRollback { + xmlRegStatePtr state;/* the current state */ + int index; /* the index in the input stack */ + int nextbranch; /* the next transition to explore in that state */ + int *counts; /* save the automate state if it has some */ +}; + +typedef struct _xmlRegInputToken xmlRegInputToken; +typedef xmlRegInputToken *xmlRegInputTokenPtr; + +struct _xmlRegInputToken { + xmlChar *value; + void *data; +}; + +struct _xmlRegExecCtxt { + int status; /* execution status != 0 indicate an error */ + int determinist; /* did we found an inderterministic behaviour */ + xmlRegexpPtr comp; /* the compiled regexp */ + xmlRegExecCallbacks callback; + void *data; + + xmlRegStatePtr state;/* the current state */ + int transno; /* the current transition on that state */ + int transcount; /* the number of char in char counted transitions */ + + /* + * A stack of rollback states + */ + int maxRollbacks; + int nbRollbacks; + xmlRegExecRollback *rollbacks; + + /* + * The state of the automata if any + */ + int *counts; + + /* + * The input stack + */ + int inputStackMax; + int inputStackNr; + int index; + int *charStack; + const xmlChar *inputString; /* when operating on characters */ + xmlRegInputTokenPtr inputStack;/* when operating on strings */ + +}; + +static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); + +/************************************************************************ + * * + * Allocation/Deallocation * + * * + ************************************************************************/ + +/** + * xmlRegEpxFromParse: + * @ctxt: the parser context used to build it + * + * Allocate a new regexp and fill it with the reult from the parser + * + * Returns the new regexp or NULL in case of error + */ +static xmlRegexpPtr +xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { + xmlRegexpPtr ret; + + ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlRegexp)); + ret->string = ctxt->string; + ctxt->string = NULL; + ret->nbStates = ctxt->nbStates; + ctxt->nbStates = 0; + ret->states = ctxt->states; + ctxt->states = NULL; + ret->nbAtoms = ctxt->nbAtoms; + ctxt->nbAtoms = 0; + ret->atoms = ctxt->atoms; + ctxt->atoms = NULL; + ret->nbCounters = ctxt->nbCounters; + ctxt->nbCounters = 0; + ret->counters = ctxt->counters; + ctxt->counters = NULL; + return(ret); +} + +/** + * xmlRegNewParserCtxt: + * @string: the string to parse + * + * Allocate a new regexp parser context + * + * Returns the new context or NULL in case of error + */ +static xmlRegParserCtxtPtr +xmlRegNewParserCtxt(const xmlChar *string) { + xmlRegParserCtxtPtr ret; + + ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); + if (ret == NULL) + return(NULL); + memset(ret, 0, sizeof(xmlRegParserCtxt)); + if (string != NULL) + ret->string = xmlStrdup(string); + ret->cur = ret->string; + ret->neg = 0; + ret->error = 0; + return(ret); +} + +/** + * xmlRegNewRange: + * @ctxt: the regexp parser context + * @neg: is that negative + * @type: the type of range + * @start: the start codepoint + * @end: the end codepoint + * + * Allocate a new regexp range + * + * Returns the new range or NULL in case of error + */ +static xmlRegRangePtr +xmlRegNewRange(xmlRegParserCtxtPtr ctxt, + int neg, xmlRegAtomType type, int start, int end) { + xmlRegRangePtr ret; + + ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); + if (ret == NULL) { + ERROR("failed to allocate regexp range"); + return(NULL); + } + ret->neg = neg; + ret->type = type; + ret->start = start; + ret->end = end; + return(ret); +} + +/** + * xmlRegFreeRange: + * @range: the regexp range + * + * Free a regexp range + */ +static void +xmlRegFreeRange(xmlRegRangePtr range) { + if (range == NULL) + return; + + if (range->blockName != NULL) + xmlFree(range->blockName); + xmlFree(range); +} + +/** + * xmlRegNewAtom: + * @ctxt: the regexp parser context + * @type: the type of atom + * + * Allocate a new regexp range + * + * Returns the new atom or NULL in case of error + */ +static xmlRegAtomPtr +xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { + xmlRegAtomPtr ret; + + ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); + if (ret == NULL) { + ERROR("failed to allocate regexp atom"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegAtom)); + ret->type = type; + ret->quant = XML_REGEXP_QUANT_ONCE; + ret->min = 0; + ret->max = 0; + return(ret); +} + +/** + * xmlRegFreeAtom: + * @atom: the regexp atom + * + * Free a regexp atom + */ +static void +xmlRegFreeAtom(xmlRegAtomPtr atom) { + int i; + + if (atom == NULL) + return; + + for (i = 0;i < atom->nbRanges;i++) + xmlRegFreeRange(atom->ranges[i]); + if (atom->ranges != NULL) + xmlFree(atom->ranges); + if (atom->type == XML_REGEXP_STRING) + xmlFree(atom->valuep); + xmlFree(atom); +} + +static xmlRegStatePtr +xmlRegNewState(xmlRegParserCtxtPtr ctxt) { + xmlRegStatePtr ret; + + ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); + if (ret == NULL) { + ERROR("failed to allocate regexp state"); + return(NULL); + } + memset(ret, 0, sizeof(xmlRegState)); + ret->type = XML_REGEXP_TRANS_STATE; + ret->mark = XML_REGEXP_MARK_NORMAL; + return(ret); +} + +/** + * xmlRegFreeState: + * @state: the regexp state + * + * Free a regexp state + */ +static void +xmlRegFreeState(xmlRegStatePtr state) { + if (state == NULL) + return; + + if (state->trans != NULL) + xmlFree(state->trans); + xmlFree(state); +} + +/** + * xmlRegFreeParserCtxt: + * @ctxt: the regexp parser context + * + * Free a regexp parser context + */ +static void +xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { + int i; + if (ctxt == NULL) + return; + + if (ctxt->string != NULL) + xmlFree(ctxt->string); + if (ctxt->states != NULL) { + for (i = 0;i < ctxt->nbStates;i++) + xmlRegFreeState(ctxt->states[i]); + xmlFree(ctxt->states); + } + if (ctxt->atoms != NULL) { + for (i = 0;i < ctxt->nbAtoms;i++) + xmlRegFreeAtom(ctxt->atoms[i]); + xmlFree(ctxt->atoms); + } + if (ctxt->counters != NULL) + xmlFree(ctxt->counters); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Display of Data structures * + * * + ************************************************************************/ + +static void +xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { + switch (type) { + case XML_REGEXP_EPSILON: + fprintf(output, "epsilon "); break; + case XML_REGEXP_CHARVAL: + fprintf(output, "charval "); break; + case XML_REGEXP_RANGES: + fprintf(output, "ranges "); break; + case XML_REGEXP_SUBREG: + fprintf(output, "subexpr "); break; + case XML_REGEXP_STRING: + fprintf(output, "string "); break; + case XML_REGEXP_ANYCHAR: + fprintf(output, "anychar "); break; + case XML_REGEXP_ANYSPACE: + fprintf(output, "anyspace "); break; + case XML_REGEXP_NOTSPACE: + fprintf(output, "notspace "); break; + case XML_REGEXP_INITNAME: + fprintf(output, "initname "); break; + case XML_REGEXP_NOTINITNAME: + fprintf(output, "notinitname "); break; + case XML_REGEXP_NAMECHAR: + fprintf(output, "namechar "); break; + case XML_REGEXP_NOTNAMECHAR: + fprintf(output, "notnamechar "); break; + case XML_REGEXP_DECIMAL: + fprintf(output, "decimal "); break; + case XML_REGEXP_NOTDECIMAL: + fprintf(output, "notdecimal "); break; + case XML_REGEXP_REALCHAR: + fprintf(output, "realchar "); break; + case XML_REGEXP_NOTREALCHAR: + fprintf(output, "notrealchar "); break; + case XML_REGEXP_LETTER: + fprintf(output, "LETTER "); break; + case XML_REGEXP_LETTER_UPPERCASE: + fprintf(output, "LETTER_UPPERCASE "); break; + case XML_REGEXP_LETTER_LOWERCASE: + fprintf(output, "LETTER_LOWERCASE "); break; + case XML_REGEXP_LETTER_TITLECASE: + fprintf(output, "LETTER_TITLECASE "); break; + case XML_REGEXP_LETTER_MODIFIER: + fprintf(output, "LETTER_MODIFIER "); break; + case XML_REGEXP_LETTER_OTHERS: + fprintf(output, "LETTER_OTHERS "); break; + case XML_REGEXP_MARK: + fprintf(output, "MARK "); break; + case XML_REGEXP_MARK_NONSPACING: + fprintf(output, "MARK_NONSPACING "); break; + case XML_REGEXP_MARK_SPACECOMBINING: + fprintf(output, "MARK_SPACECOMBINING "); break; + case XML_REGEXP_MARK_ENCLOSING: + fprintf(output, "MARK_ENCLOSING "); break; + case XML_REGEXP_NUMBER: + fprintf(output, "NUMBER "); break; + case XML_REGEXP_NUMBER_DECIMAL: + fprintf(output, "NUMBER_DECIMAL "); break; + case XML_REGEXP_NUMBER_LETTER: + fprintf(output, "NUMBER_LETTER "); break; + case XML_REGEXP_NUMBER_OTHERS: + fprintf(output, "NUMBER_OTHERS "); break; + case XML_REGEXP_PUNCT: + fprintf(output, "PUNCT "); break; + case XML_REGEXP_PUNCT_CONNECTOR: + fprintf(output, "PUNCT_CONNECTOR "); break; + case XML_REGEXP_PUNCT_DASH: + fprintf(output, "PUNCT_DASH "); break; + case XML_REGEXP_PUNCT_OPEN: + fprintf(output, "PUNCT_OPEN "); break; + case XML_REGEXP_PUNCT_CLOSE: + fprintf(output, "PUNCT_CLOSE "); break; + case XML_REGEXP_PUNCT_INITQUOTE: + fprintf(output, "PUNCT_INITQUOTE "); break; + case XML_REGEXP_PUNCT_FINQUOTE: + fprintf(output, "PUNCT_FINQUOTE "); break; + case XML_REGEXP_PUNCT_OTHERS: + fprintf(output, "PUNCT_OTHERS "); break; + case XML_REGEXP_SEPAR: + fprintf(output, "SEPAR "); break; + case XML_REGEXP_SEPAR_SPACE: + fprintf(output, "SEPAR_SPACE "); break; + case XML_REGEXP_SEPAR_LINE: + fprintf(output, "SEPAR_LINE "); break; + case XML_REGEXP_SEPAR_PARA: + fprintf(output, "SEPAR_PARA "); break; + case XML_REGEXP_SYMBOL: + fprintf(output, "SYMBOL "); break; + case XML_REGEXP_SYMBOL_MATH: + fprintf(output, "SYMBOL_MATH "); break; + case XML_REGEXP_SYMBOL_CURRENCY: + fprintf(output, "SYMBOL_CURRENCY "); break; + case XML_REGEXP_SYMBOL_MODIFIER: + fprintf(output, "SYMBOL_MODIFIER "); break; + case XML_REGEXP_SYMBOL_OTHERS: + fprintf(output, "SYMBOL_OTHERS "); break; + case XML_REGEXP_OTHER: + fprintf(output, "OTHER "); break; + case XML_REGEXP_OTHER_CONTROL: + fprintf(output, "OTHER_CONTROL "); break; + case XML_REGEXP_OTHER_FORMAT: + fprintf(output, "OTHER_FORMAT "); break; + case XML_REGEXP_OTHER_PRIVATE: + fprintf(output, "OTHER_PRIVATE "); break; + case XML_REGEXP_OTHER_NA: + fprintf(output, "OTHER_NA "); break; + case XML_REGEXP_BLOCK_NAME: + fprintf(output, "BLOCK "); break; + } +} + +static void +xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { + switch (type) { + case XML_REGEXP_QUANT_EPSILON: + fprintf(output, "epsilon "); break; + case XML_REGEXP_QUANT_ONCE: + fprintf(output, "once "); break; + case XML_REGEXP_QUANT_OPT: + fprintf(output, "? "); break; + case XML_REGEXP_QUANT_MULT: + fprintf(output, "* "); break; + case XML_REGEXP_QUANT_PLUS: + fprintf(output, "+ "); break; + case XML_REGEXP_QUANT_RANGE: + fprintf(output, "range "); break; + } +} +static void +xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { + fprintf(output, " range: "); + if (range->neg) + fprintf(output, "negative "); + xmlRegPrintAtomType(output, range->type); + fprintf(output, "%c - %c\n", range->start, range->end); +} + +static void +xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { + fprintf(output, " atom: "); + if (atom == NULL) { + fprintf(output, "NULL\n"); + return; + } + xmlRegPrintAtomType(output, atom->type); + xmlRegPrintQuantType(output, atom->quant); + if (atom->quant == XML_REGEXP_QUANT_RANGE) + fprintf(output, "%d-%d ", atom->min, atom->max); + if (atom->type == XML_REGEXP_STRING) + fprintf(output, "'%s' ", (char *) atom->valuep); + if (atom->type == XML_REGEXP_CHARVAL) + fprintf(output, "char %c\n", atom->codepoint); + else if (atom->type == XML_REGEXP_RANGES) { + int i; + fprintf(output, "%d entries\n", atom->nbRanges); + for (i = 0; i < atom->nbRanges;i++) + xmlRegPrintRange(output, atom->ranges[i]); + } else if (atom->type == XML_REGEXP_SUBREG) { + fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); + } else { + fprintf(output, "\n"); + } +} + +static void +xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { + fprintf(output, " trans: "); + if (trans == NULL) { + fprintf(output, "NULL\n"); + return; + } + if (trans->to < 0) { + fprintf(output, "removed\n"); + return; + } + if (trans->counter >= 0) { + fprintf(output, "counted %d, ", trans->counter); + } + if (trans->count >= 0) { + fprintf(output, "count based %d, ", trans->count); + } + if (trans->atom == NULL) { + fprintf(output, "epsilon to %d\n", trans->to); + return; + } + if (trans->atom->type == XML_REGEXP_CHARVAL) + fprintf(output, "char %c ", trans->atom->codepoint); + fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); +} + +static void +xmlRegPrintState(FILE *output, xmlRegStatePtr state) { + int i; + + fprintf(output, " state: "); + if (state == NULL) { + fprintf(output, "NULL\n"); + return; + } + if (state->type == XML_REGEXP_START_STATE) + fprintf(output, "START "); + if (state->type == XML_REGEXP_FINAL_STATE) + fprintf(output, "FINAL "); + + fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); + for (i = 0;i < state->nbTrans; i++) { + xmlRegPrintTrans(output, &(state->trans[i])); + } +} + +#if 0 +static void +xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { + int i; + + fprintf(output, " ctxt: "); + if (ctxt == NULL) { + fprintf(output, "NULL\n"); + return; + } + fprintf(output, "'%s' ", ctxt->string); + if (ctxt->error) + fprintf(output, "error "); + if (ctxt->neg) + fprintf(output, "neg "); + fprintf(output, "\n"); + fprintf(output, "%d atoms:\n", ctxt->nbAtoms); + for (i = 0;i < ctxt->nbAtoms; i++) { + fprintf(output, " %02d ", i); + xmlRegPrintAtom(output, ctxt->atoms[i]); + } + if (ctxt->atom != NULL) { + fprintf(output, "current atom:\n"); + xmlRegPrintAtom(output, ctxt->atom); + } + fprintf(output, "%d states:", ctxt->nbStates); + if (ctxt->start != NULL) + fprintf(output, " start: %d", ctxt->start->no); + if (ctxt->end != NULL) + fprintf(output, " end: %d", ctxt->end->no); + fprintf(output, "\n"); + for (i = 0;i < ctxt->nbStates; i++) { + xmlRegPrintState(output, ctxt->states[i]); + } + fprintf(output, "%d counters:\n", ctxt->nbCounters); + for (i = 0;i < ctxt->nbCounters; i++) { + fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, + ctxt->counters[i].max); + } +} +#endif + +/************************************************************************ + * * + * Finite Automata structures manipulations * + * * + ************************************************************************/ + +static void +xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, + int neg, xmlRegAtomType type, int start, int end, + xmlChar *blockName) { + xmlRegRangePtr range; + + if (atom == NULL) { + ERROR("add range: atom is NULL"); + return; + } + if (atom->type != XML_REGEXP_RANGES) { + ERROR("add range: atom is not ranges"); + return; + } + if (atom->maxRanges == 0) { + atom->maxRanges = 4; + atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (atom->ranges == NULL) { + ERROR("add range: allocation failed"); + atom->maxRanges = 0; + return; + } + } else if (atom->nbRanges >= atom->maxRanges) { + xmlRegRangePtr *tmp; + atom->maxRanges *= 2; + tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * + sizeof(xmlRegRangePtr)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + atom->maxRanges /= 2; + return; + } + atom->ranges = tmp; + } + range = xmlRegNewRange(ctxt, neg, type, start, end); + if (range == NULL) + return; + range->blockName = blockName; + atom->ranges[atom->nbRanges++] = range; + +} + +static int +xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { + if (ctxt->maxCounters == 0) { + ctxt->maxCounters = 4; + ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (ctxt->counters == NULL) { + ERROR("reg counter: allocation failed"); + ctxt->maxCounters = 0; + return(-1); + } + } else if (ctxt->nbCounters >= ctxt->maxCounters) { + xmlRegCounter *tmp; + ctxt->maxCounters *= 2; + tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * + sizeof(xmlRegCounter)); + if (tmp == NULL) { + ERROR("reg counter: allocation failed"); + ctxt->maxCounters /= 2; + return(-1); + } + ctxt->counters = tmp; + } + ctxt->counters[ctxt->nbCounters].min = -1; + ctxt->counters[ctxt->nbCounters].max = -1; + return(ctxt->nbCounters++); +} + +static void +xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { + if (atom == NULL) { + ERROR("atom push: atom is NULL"); + return; + } + if (ctxt->maxAtoms == 0) { + ctxt->maxAtoms = 4; + ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (ctxt->atoms == NULL) { + ERROR("atom push: allocation failed"); + ctxt->maxAtoms = 0; + return; + } + } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { + xmlRegAtomPtr *tmp; + ctxt->maxAtoms *= 2; + tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * + sizeof(xmlRegAtomPtr)); + if (tmp == NULL) { + ERROR("atom push: allocation failed"); + ctxt->maxAtoms /= 2; + return; + } + ctxt->atoms = tmp; + } + atom->no = ctxt->nbAtoms; + ctxt->atoms[ctxt->nbAtoms++] = atom; +} + +static void +xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + xmlRegAtomPtr atom, xmlRegStatePtr target, + int counter, int count) { + if (state == NULL) { + ERROR("add state: state is NULL"); + return; + } + if (target == NULL) { + ERROR("add state: target is NULL"); + return; + } + if (state->maxTrans == 0) { + state->maxTrans = 4; + state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * + sizeof(xmlRegTrans)); + if (state->trans == NULL) { + ERROR("add range: allocation failed"); + state->maxTrans = 0; + return; + } + } else if (state->nbTrans >= state->maxTrans) { + xmlRegTrans *tmp; + state->maxTrans *= 2; + tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * + sizeof(xmlRegTrans)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + state->maxTrans /= 2; + return; + } + state->trans = tmp; + } +#ifdef DEBUG_REGEXP_GRAPH + printf("Add trans from %d to %d ", state->no, target->no); + if (count >= 0) + printf("count based %d", count); + else if (counter >= 0) + printf("counted %d", counter); + else if (atom == NULL) + printf("epsilon transition"); + printf("\n"); +#endif + + state->trans[state->nbTrans].atom = atom; + state->trans[state->nbTrans].to = target->no; + state->trans[state->nbTrans].counter = counter; + state->trans[state->nbTrans].count = count; + state->nbTrans++; +} + +static void +xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { + if (ctxt->maxStates == 0) { + ctxt->maxStates = 4; + ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (ctxt->states == NULL) { + ERROR("add range: allocation failed"); + ctxt->maxStates = 0; + return; + } + } else if (ctxt->nbStates >= ctxt->maxStates) { + xmlRegStatePtr *tmp; + ctxt->maxStates *= 2; + tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * + sizeof(xmlRegStatePtr)); + if (tmp == NULL) { + ERROR("add range: allocation failed"); + ctxt->maxStates /= 2; + return; + } + ctxt->states = tmp; + } + state->no = ctxt->nbStates; + ctxt->states[ctxt->nbStates++] = state; +} + +/** + * xmlFAGenerateEpsilonTransition: + * ctxt: a regexp parser context + * from: the from state + * to: the target state or NULL for building a new one + * + */ +static void +xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); +} + +/** + * xmlFAGenerateCountedEpsilonTransition: + * ctxt: a regexp parser context + * from: the from state + * to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); +} + +/** + * xmlFAGenerateCountedTransition: + * ctxt: a regexp parser context + * from: the from state + * to: the target state or NULL for building a new one + * counter: the counter for that transition + * + */ +static void +xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, + xmlRegStatePtr from, xmlRegStatePtr to, int counter) { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + ctxt->state = to; + } + xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); +} + +/** + * xmlFAGenerateTransitions: + * ctxt: a regexp parser context + * from: the from state + * to: the target state or NULL for building a new one + * atom: the atom generating the transition + * + */ +static void +xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, + xmlRegStatePtr to, xmlRegAtomPtr atom) { + if (atom == NULL) { + ERROR("genrate transition: atom == NULL"); + return; + } + if (atom->type == XML_REGEXP_SUBREG) { + /* + * this is a subexpression handling one should not need to + * create a new node excep for XML_REGEXP_QUANT_RANGE. + */ + xmlRegAtomPush(ctxt, atom); + if ((to != NULL) && (atom->stop != to) && + (atom->quant != XML_REGEXP_QUANT_RANGE)) { + /* + * Generate an epsilon transition to link to the target + */ + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); + } + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); + break; + case XML_REGEXP_QUANT_RANGE: { + int counter; + xmlRegStatePtr newstate; + + /* + * This one is nasty: + * 1/ register a new counter + * 2/ register an epsilon transition associated to + * this counter going from atom->stop to atom->start + * 3/ create a new state + * 4/ generate a counted transition from atom->stop to + * that state + */ + counter = xmlRegGetCounter(ctxt); + ctxt->counters[counter].min = atom->min - 1; + ctxt->counters[counter].max = atom->max - 1; + atom->min = 0; + atom->max = 0; + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, + atom->start, counter); + if (to != NULL) { + newstate = to; + } else { + newstate = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, newstate); + ctxt->state = newstate; + } + xmlFAGenerateCountedTransition(ctxt, atom->stop, + newstate, counter); + } + default: + break; + } + return; + } else { + if (to == NULL) { + to = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, to); + } + xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); + xmlRegAtomPush(ctxt, atom); + ctxt->state = to; + } + switch (atom->quant) { + case XML_REGEXP_QUANT_OPT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + break; + case XML_REGEXP_QUANT_MULT: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlFAGenerateEpsilonTransition(ctxt, from, to); + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + case XML_REGEXP_QUANT_PLUS: + atom->quant = XML_REGEXP_QUANT_ONCE; + xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); + break; + default: + break; + } +} + +/** + * xmlFAReduceEpsilonTransitions: + * ctxt: a regexp parser context + * @fromnr: the from state + * @tonr: the to state + * @cpunter: should that transition be associted to a counted + * + */ +static void +xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, + int tonr, int counter) { + int transnr; + xmlRegStatePtr from; + xmlRegStatePtr to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); +#endif + from = ctxt->states[fromnr]; + if (from == NULL) + return; + to = ctxt->states[tonr]; + if (to == NULL) + return; + if ((to->mark == XML_REGEXP_MARK_START) || + (to->mark == XML_REGEXP_MARK_VISITED)) + return; + + to->mark = XML_REGEXP_MARK_VISITED; + if (to->type == XML_REGEXP_FINAL_STATE) { +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final, so %d becomes final\n", tonr, fromnr); +#endif + from->type = XML_REGEXP_FINAL_STATE; + } + for (transnr = 0;transnr < to->nbTrans;transnr++) { + if (to->trans[transnr].atom == NULL) { + /* + * Don't remove counted transitions + * Don't loop either + */ + if ((to->trans[transnr].count < 0) && + (to->trans[transnr].to != fromnr)) { +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, tonr, to->trans[transnr].to); +#endif + xmlFAReduceEpsilonTransitions(ctxt, fromnr, + to->trans[transnr].to, counter); + } + } else { + int newto = to->trans[transnr].to; + + xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, + ctxt->states[newto], counter, -1); + } + } + to->mark = XML_REGEXP_MARK_NORMAL; +} + +/** + * xmlFAEliminateEpsilonTransitions: + * ctxt: a regexp parser context + * + */ +static void +xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { + int statenr, transnr; + xmlRegStatePtr state; + + /* + * build the completed transitions bypassing the epsilons + * Use a marking algorithm to avoid loops + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].atom == NULL) && + (state->trans[transnr].to >= 0)) { + if (state->trans[transnr].to == statenr) { + state->trans[transnr].to = -1; +#ifdef DEBUG_REGEXP_GRAPH + printf("Removed loopback epsilon trans %d on %d\n", + transnr, statenr); +#endif + } else if (state->trans[transnr].count < 0) { + int newto = state->trans[transnr].to; + +#ifdef DEBUG_REGEXP_GRAPH + printf("Found epsilon trans %d from %d to %d\n", + transnr, statenr, newto); +#endif + state->mark = XML_REGEXP_MARK_START; + xmlFAReduceEpsilonTransitions(ctxt, statenr, + newto, state->trans[transnr].counter); + state->mark = XML_REGEXP_MARK_NORMAL; +#ifdef DEBUG_REGEXP_GRAPH + } else { + printf("Found counted transition %d on %d\n", + transnr, statenr); +#endif + } + } + } + } + /* + * Eliminate the epsilon transitions + */ + for (statenr = 0;statenr < ctxt->nbStates;statenr++) { + state = ctxt->states[statenr]; + if (state == NULL) + continue; + for (transnr = 0;transnr < state->nbTrans;transnr++) { + if ((state->trans[transnr].atom == NULL) && + (state->trans[transnr].count < 0) && + (state->trans[transnr].to >= 0)) { + state->trans[transnr].to = -1; + } + } + } +} + +/************************************************************************ + * * + * Routines to check input against transition atoms * + * * + ************************************************************************/ + +static int +xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, + int start, int end, const xmlChar *blockName) { + int ret = 0; + + switch (type) { + case XML_REGEXP_STRING: + case XML_REGEXP_SUBREG: + case XML_REGEXP_RANGES: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_ANYCHAR: + ret = ((codepoint != '\n') && (codepoint != '\r')); + break; + case XML_REGEXP_CHARVAL: + ret = ((codepoint >= start) && (codepoint <= end)); + break; + case XML_REGEXP_NOTSPACE: + neg = !neg; + case XML_REGEXP_ANYSPACE: + ret = ((codepoint == '\n') || (codepoint == '\r') || + (codepoint == '\t') || (codepoint == ' ')); + break; + case XML_REGEXP_NOTINITNAME: + neg = !neg; + case XML_REGEXP_INITNAME: + ret = (xmlIsLetter(codepoint) || + (codepoint == '_') || (codepoint == ':')); + break; + case XML_REGEXP_NOTNAMECHAR: + neg = !neg; + case XML_REGEXP_NAMECHAR: + ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) || + (codepoint == '.') || (codepoint == '-') || + (codepoint == '_') || (codepoint == ':') || + xmlIsCombining(codepoint) || xmlIsExtender(codepoint)); + break; + case XML_REGEXP_NOTDECIMAL: + neg = !neg; + case XML_REGEXP_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_REALCHAR: + neg = !neg; + case XML_REGEXP_NOTREALCHAR: + ret = xmlUCSIsCatP(codepoint); + if (ret == 0) + ret = xmlUCSIsCatZ(codepoint); + if (ret == 0) + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_LETTER: + ret = xmlUCSIsCatL(codepoint); + break; + case XML_REGEXP_LETTER_UPPERCASE: + ret = xmlUCSIsCatLu(codepoint); + break; + case XML_REGEXP_LETTER_LOWERCASE: + ret = xmlUCSIsCatLl(codepoint); + break; + case XML_REGEXP_LETTER_TITLECASE: + ret = xmlUCSIsCatLt(codepoint); + break; + case XML_REGEXP_LETTER_MODIFIER: + ret = xmlUCSIsCatLm(codepoint); + break; + case XML_REGEXP_LETTER_OTHERS: + ret = xmlUCSIsCatLo(codepoint); + break; + case XML_REGEXP_MARK: + ret = xmlUCSIsCatM(codepoint); + break; + case XML_REGEXP_MARK_NONSPACING: + ret = xmlUCSIsCatMn(codepoint); + break; + case XML_REGEXP_MARK_SPACECOMBINING: + ret = xmlUCSIsCatMc(codepoint); + break; + case XML_REGEXP_MARK_ENCLOSING: + ret = xmlUCSIsCatMe(codepoint); + break; + case XML_REGEXP_NUMBER: + ret = xmlUCSIsCatN(codepoint); + break; + case XML_REGEXP_NUMBER_DECIMAL: + ret = xmlUCSIsCatNd(codepoint); + break; + case XML_REGEXP_NUMBER_LETTER: + ret = xmlUCSIsCatNl(codepoint); + break; + case XML_REGEXP_NUMBER_OTHERS: + ret = xmlUCSIsCatNo(codepoint); + break; + case XML_REGEXP_PUNCT: + ret = xmlUCSIsCatP(codepoint); + break; + case XML_REGEXP_PUNCT_CONNECTOR: + ret = xmlUCSIsCatPc(codepoint); + break; + case XML_REGEXP_PUNCT_DASH: + ret = xmlUCSIsCatPd(codepoint); + break; + case XML_REGEXP_PUNCT_OPEN: + ret = xmlUCSIsCatPs(codepoint); + break; + case XML_REGEXP_PUNCT_CLOSE: + ret = xmlUCSIsCatPe(codepoint); + break; + case XML_REGEXP_PUNCT_INITQUOTE: + ret = xmlUCSIsCatPi(codepoint); + break; + case XML_REGEXP_PUNCT_FINQUOTE: + ret = xmlUCSIsCatPf(codepoint); + break; + case XML_REGEXP_PUNCT_OTHERS: + ret = xmlUCSIsCatPo(codepoint); + break; + case XML_REGEXP_SEPAR: + ret = xmlUCSIsCatZ(codepoint); + break; + case XML_REGEXP_SEPAR_SPACE: + ret = xmlUCSIsCatZs(codepoint); + break; + case XML_REGEXP_SEPAR_LINE: + ret = xmlUCSIsCatZl(codepoint); + break; + case XML_REGEXP_SEPAR_PARA: + ret = xmlUCSIsCatZp(codepoint); + break; + case XML_REGEXP_SYMBOL: + ret = xmlUCSIsCatS(codepoint); + break; + case XML_REGEXP_SYMBOL_MATH: + ret = xmlUCSIsCatSm(codepoint); + break; + case XML_REGEXP_SYMBOL_CURRENCY: + ret = xmlUCSIsCatSc(codepoint); + break; + case XML_REGEXP_SYMBOL_MODIFIER: + ret = xmlUCSIsCatSk(codepoint); + break; + case XML_REGEXP_SYMBOL_OTHERS: + ret = xmlUCSIsCatSo(codepoint); + break; + case XML_REGEXP_OTHER: + ret = xmlUCSIsCatC(codepoint); + break; + case XML_REGEXP_OTHER_CONTROL: + ret = xmlUCSIsCatCc(codepoint); + break; + case XML_REGEXP_OTHER_FORMAT: + ret = xmlUCSIsCatCf(codepoint); + break; + case XML_REGEXP_OTHER_PRIVATE: + ret = xmlUCSIsCatCo(codepoint); + break; + case XML_REGEXP_OTHER_NA: + /* ret = xmlUCSIsCatCn(codepoint); */ + /* Seems it doesn't exist anymore in recent Unicode releases */ + ret = 0; + break; + case XML_REGEXP_BLOCK_NAME: + ret = xmlUCSIsBlock(codepoint, (const char *) blockName); + break; + } + if (neg) + return(!ret); + return(ret); +} + +static int +xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { + int i, ret = 0; + xmlRegRangePtr range; + + if ((atom == NULL) || (!xmlIsChar(codepoint))) + return(-1); + + switch (atom->type) { + case XML_REGEXP_SUBREG: + case XML_REGEXP_EPSILON: + return(-1); + case XML_REGEXP_CHARVAL: + return(codepoint == atom->codepoint); + case XML_REGEXP_RANGES: { + int accept = 0; + for (i = 0;i < atom->nbRanges;i++) { + range = atom->ranges[i]; + if (range->neg) { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + return(0); /* excluded char */ + } else { + ret = xmlRegCheckCharacterRange(range->type, codepoint, + 0, range->start, range->end, + range->blockName); + if (ret != 0) + accept = 1; /* might still be excluded */ + } + } + return(accept); + } + case XML_REGEXP_STRING: + printf("TODO: XML_REGEXP_STRING\n"); + return(-1); + case XML_REGEXP_ANYCHAR: + case XML_REGEXP_ANYSPACE: + case XML_REGEXP_NOTSPACE: + case XML_REGEXP_INITNAME: + case XML_REGEXP_NOTINITNAME: + case XML_REGEXP_NAMECHAR: + case XML_REGEXP_NOTNAMECHAR: + case XML_REGEXP_DECIMAL: + case XML_REGEXP_NOTDECIMAL: + case XML_REGEXP_REALCHAR: + case XML_REGEXP_NOTREALCHAR: + case XML_REGEXP_LETTER: + case XML_REGEXP_LETTER_UPPERCASE: + case XML_REGEXP_LETTER_LOWERCASE: + case XML_REGEXP_LETTER_TITLECASE: + case XML_REGEXP_LETTER_MODIFIER: + case XML_REGEXP_LETTER_OTHERS: + case XML_REGEXP_MARK: + case XML_REGEXP_MARK_NONSPACING: + case XML_REGEXP_MARK_SPACECOMBINING: + case XML_REGEXP_MARK_ENCLOSING: + case XML_REGEXP_NUMBER: + case XML_REGEXP_NUMBER_DECIMAL: + case XML_REGEXP_NUMBER_LETTER: + case XML_REGEXP_NUMBER_OTHERS: + case XML_REGEXP_PUNCT: + case XML_REGEXP_PUNCT_CONNECTOR: + case XML_REGEXP_PUNCT_DASH: + case XML_REGEXP_PUNCT_OPEN: + case XML_REGEXP_PUNCT_CLOSE: + case XML_REGEXP_PUNCT_INITQUOTE: + case XML_REGEXP_PUNCT_FINQUOTE: + case XML_REGEXP_PUNCT_OTHERS: + case XML_REGEXP_SEPAR: + case XML_REGEXP_SEPAR_SPACE: + case XML_REGEXP_SEPAR_LINE: + case XML_REGEXP_SEPAR_PARA: + case XML_REGEXP_SYMBOL: + case XML_REGEXP_SYMBOL_MATH: + case XML_REGEXP_SYMBOL_CURRENCY: + case XML_REGEXP_SYMBOL_MODIFIER: + case XML_REGEXP_SYMBOL_OTHERS: + case XML_REGEXP_OTHER: + case XML_REGEXP_OTHER_CONTROL: + case XML_REGEXP_OTHER_FORMAT: + case XML_REGEXP_OTHER_PRIVATE: + case XML_REGEXP_OTHER_NA: + case XML_REGEXP_BLOCK_NAME: + ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, + (const xmlChar *)atom->valuep); + if (atom->neg) + ret = !ret; + break; + } + return(ret); +} + +/************************************************************************ + * * + * Saving an restoring state of an execution context * + * * + ************************************************************************/ + +#ifdef DEBUG_REGEXP_EXEC +static void +xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { + printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); + if (exec->inputStack != NULL) { + int i; + printf(": "); + for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) + printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); + } else { + printf(": %s", &(exec->inputString[exec->index])); + } + printf("\n"); +} +#endif + +static void +xmlFARegExecSave(xmlRegExecCtxtPtr exec) { +#ifdef DEBUG_REGEXP_EXEC + printf("saving "); + exec->transno++; + xmlFARegDebugExec(exec); + exec->transno--; +#endif + + if (exec->maxRollbacks == 0) { + exec->maxRollbacks = 4; + exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * + sizeof(xmlRegExecRollback)); + if (exec->rollbacks == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->maxRollbacks = 0; + return; + } + memset(exec->rollbacks, 0, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + } else if (exec->nbRollbacks >= exec->maxRollbacks) { + xmlRegExecRollback *tmp; + int len = exec->maxRollbacks; + + exec->maxRollbacks *= 2; + tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, + exec->maxRollbacks * sizeof(xmlRegExecRollback)); + if (tmp == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->maxRollbacks /= 2; + return; + } + exec->rollbacks = tmp; + tmp = &exec->rollbacks[len]; + memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); + } + exec->rollbacks[exec->nbRollbacks].state = exec->state; + exec->rollbacks[exec->nbRollbacks].index = exec->index; + exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + exec->rollbacks[exec->nbRollbacks].counts = (int *) + xmlMalloc(exec->comp->nbCounters * sizeof(int)); + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->status = -5; + return; + } + } + memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, + exec->comp->nbCounters * sizeof(int)); + } + exec->nbRollbacks++; +} + +static void +xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { + if (exec->nbRollbacks <= 0) { + exec->status = -1; +#ifdef DEBUG_REGEXP_EXEC + printf("rollback failed on empty stack\n"); +#endif + return; + } + exec->nbRollbacks--; + exec->state = exec->rollbacks[exec->nbRollbacks].state; + exec->index = exec->rollbacks[exec->nbRollbacks].index; + exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; + if (exec->comp->nbCounters > 0) { + if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { + fprintf(stderr, "exec save: allocation failed"); + exec->status = -6; + return; + } + memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, + exec->comp->nbCounters * sizeof(int)); + } + +#ifdef DEBUG_REGEXP_EXEC + printf("restored "); + xmlFARegDebugExec(exec); +#endif +} + +/************************************************************************ + * * + * Verifyer, running an input against a compiled regexp * + * * + ************************************************************************/ + +static int +xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { + xmlRegExecCtxt execval; + xmlRegExecCtxtPtr exec = &execval; + int ret, codepoint, len; + + exec->inputString = content; + exec->index = 0; + exec->determinist = 1; + exec->maxRollbacks = 0; + exec->nbRollbacks = 0; + exec->rollbacks = NULL; + exec->status = 0; + exec->comp = comp; + exec->state = comp->states[0]; + exec->transno = 0; + exec->transcount = 0; + if (comp->nbCounters > 0) { + exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); + if (exec->counts == NULL) + return(-1); + memset(exec->counts, 0, comp->nbCounters * sizeof(int)); + } else + exec->counts = NULL; + while ((exec->status == 0) && + ((exec->inputString[exec->index] != 0) || + (exec->state->type != XML_REGEXP_FINAL_STATE))) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_REGEXP_EXEC + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (exec->inputString[exec->index] != 0) { + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); + ret = xmlRegCheckCharacter(atom, codepoint); + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index += len; + /* + * End of input: stop here + */ + if (exec->inputString[exec->index] == 0) { + exec->index -= len; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), + len); + ret = xmlRegCheckCharacter(atom, codepoint); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_REGEXP_EXEC + printf("entering state %d\n", trans->to); +#endif + exec->state = comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + exec->index += len; + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + } +progress: + continue; + } + if (exec->rollbacks != NULL) { + if (exec->counts != NULL) { + int i; + + for (i = 0;i < exec->maxRollbacks;i++) + if (exec->rollbacks[i].counts != NULL) + xmlFree(exec->rollbacks[i].counts); + } + xmlFree(exec->rollbacks); + } + if (exec->counts != NULL) + xmlFree(exec->counts); + if (exec->status == 0) + return(1); + if (exec->status == -1) + return(0); + return(exec->status); +} + +/************************************************************************ + * * + * Progressive interface to the verifyer one atom at a time * + * * + ************************************************************************/ + +/** + * xmlRegExecCtxtPtr: + * @comp: a precompiled regular expression + * @callback: a callback function used for handling progresses in the + * automata matching phase + * @data: the context data associated to the callback in this context + * + * Build a context used for progressive evaluation of a regexp. + */ +xmlRegExecCtxtPtr +xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { + xmlRegExecCtxtPtr exec; + + if (comp == NULL) + return(NULL); + exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); + if (exec == NULL) { + return(NULL); + } + memset(exec, 0, sizeof(xmlRegExecCtxt)); + exec->inputString = NULL; + exec->index = 0; + exec->determinist = 1; + exec->maxRollbacks = 0; + exec->nbRollbacks = 0; + exec->rollbacks = NULL; + exec->status = 0; + exec->comp = comp; + exec->state = comp->states[0]; + exec->transno = 0; + exec->transcount = 0; + exec->callback = callback; + exec->data = data; + if (comp->nbCounters > 0) { + exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); + if (exec->counts == NULL) { + xmlFree(exec); + return(NULL); + } + memset(exec->counts, 0, comp->nbCounters * sizeof(int)); + } else + exec->counts = NULL; + exec->inputStackMax = 0; + exec->inputStackNr = 0; + exec->inputStack = NULL; + return(exec); +} + +/** + * xmlRegFreeExecCtxt: + * @exec: a regular expression evaulation context + * + * Free the structures associated to a regular expression evaulation context. + */ +void +xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { + if (exec == NULL) + return; + + if (exec->rollbacks != NULL) { + if (exec->counts != NULL) { + int i; + + for (i = 0;i < exec->maxRollbacks;i++) + if (exec->rollbacks[i].counts != NULL) + xmlFree(exec->rollbacks[i].counts); + } + xmlFree(exec->rollbacks); + } + if (exec->counts != NULL) + xmlFree(exec->counts); + if (exec->inputStack != NULL) { + int i; + + for (i = 0;i < exec->inputStackNr;i++) + xmlFree(exec->inputStack[i].value); + xmlFree(exec->inputStack); + } + xmlFree(exec); +} + +static void +xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, + void *data) { +#ifdef DEBUG_PUSH + printf("saving value: %d:%s\n", exec->inputStackNr, value); +#endif + if (exec->inputStackMax == 0) { + exec->inputStackMax = 4; + exec->inputStack = (xmlRegInputTokenPtr) + xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); + if (exec->inputStack == NULL) { + fprintf(stderr, "push input: allocation failed"); + exec->inputStackMax = 0; + return; + } + } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { + xmlRegInputTokenPtr tmp; + + exec->inputStackMax *= 2; + tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, + exec->inputStackMax * sizeof(xmlRegInputToken)); + if (tmp == NULL) { + fprintf(stderr, "push input: allocation failed"); + exec->inputStackMax /= 2; + return; + } + exec->inputStack = tmp; + } + exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); + exec->inputStack[exec->inputStackNr].data = data; + exec->inputStackNr++; + exec->inputStack[exec->inputStackNr].value = NULL; + exec->inputStack[exec->inputStackNr].data = NULL; +} + + +/** + * xmlRegExecPushString: + * @exec: a regexp execution context + * @value: a string token input + * @data: data associated to the token to reuse in callbacks + * + * Push one input token in the execution context + * + * Returns: 1 if the regexp reached a final state, 0 if non-final, and + * a negative value in case of error. + */ +int +xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, + void *data) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + int ret; + int final = 0; + + if (exec == NULL) + return(-1); + if (exec->status != 0) + return(exec->status); + + if (value == NULL) { + if (exec->state->type == XML_REGEXP_FINAL_STATE) + return(1); + final = 1; + } + +#ifdef DEBUG_PUSH + printf("value pushed: %s\n", value); +#endif + /* + * If we have an active rollback stack push the new value there + * and get back to where we were left + */ + if ((value != NULL) && (exec->inputStackNr > 0)) { + xmlFARegExecSaveInputString(exec, value, data); + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } + + while ((exec->status == 0) && + ((value != NULL) || + ((final == 1) && + (exec->state->type != XML_REGEXP_FINAL_STATE)))) { + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if (value == NULL) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_PUSH + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (value != NULL) { + ret = xmlStrEqual(value, atom->valuep); + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = exec->comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index++; + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + + /* + * End of input: stop here + */ + if (value == NULL) { + exec->index --; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + ret = xmlStrEqual(value, atom->valuep); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if ((exec->callback != NULL) && (atom != NULL)) { + exec->callback(exec->data, atom->valuep, + atom->data, data); + } + if (exec->state->nbTrans > exec->transno + 1) { + if (exec->inputStackNr <= 0) { + xmlFARegExecSaveInputString(exec, value, data); + } + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_PUSH + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_PUSH + printf("entering state %d\n", trans->to); +#endif + exec->state = exec->comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + if (exec->inputStack != NULL) { + exec->index++; + if (exec->index < exec->inputStackNr) { + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } else { + value = NULL; + data = NULL; +#ifdef DEBUG_PUSH + printf("end of input\n"); +#endif + } + } else { + value = NULL; + data = NULL; +#ifdef DEBUG_PUSH + printf("end of input\n"); +#endif + } + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + if (exec->status == 0) { + value = exec->inputStack[exec->index].value; + data = exec->inputStack[exec->index].data; +#ifdef DEBUG_PUSH + printf("value loaded: %s\n", value); +#endif + } + } +progress: + continue; + } + if (exec->status == 0) { + return(exec->state->type == XML_REGEXP_FINAL_STATE); + } + return(exec->status); +} + +#if 0 +static int +xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { + xmlRegTransPtr trans; + xmlRegAtomPtr atom; + int ret; + int codepoint, len; + + if (exec == NULL) + return(-1); + if (exec->status != 0) + return(exec->status); + + while ((exec->status == 0) && + ((exec->inputString[exec->index] != 0) || + (exec->state->type != XML_REGEXP_FINAL_STATE))) { + + /* + * End of input on non-terminal state, rollback, however we may + * still have epsilon like transition for counted transitions + * on counters, in that case don't break too early. + */ + if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) + goto rollback; + + exec->transcount = 0; + for (;exec->transno < exec->state->nbTrans;exec->transno++) { + trans = &exec->state->trans[exec->transno]; + if (trans->to < 0) + continue; + atom = trans->atom; + ret = 0; + if (trans->count >= 0) { + int count; + xmlRegCounterPtr counter; + + /* + * A counted transition. + */ + + count = exec->counts[trans->count]; + counter = &exec->comp->counters[trans->count]; +#ifdef DEBUG_REGEXP_EXEC + printf("testing count %d: val %d, min %d, max %d\n", + trans->count, count, counter->min, counter->max); +#endif + ret = ((count >= counter->min) && (count <= counter->max)); + } else if (atom == NULL) { + fprintf(stderr, "epsilon transition left at runtime\n"); + exec->status = -2; + break; + } else if (exec->inputString[exec->index] != 0) { + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); + ret = xmlRegCheckCharacter(atom, codepoint); + if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { + xmlRegStatePtr to = exec->comp->states[trans->to]; + + /* + * this is a multiple input sequence + */ + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + exec->transcount = 1; + do { + /* + * Try to progress as much as possible on the input + */ + if (exec->transcount == atom->max) { + break; + } + exec->index += len; + /* + * End of input: stop here + */ + if (exec->inputString[exec->index] == 0) { + exec->index -= len; + break; + } + if (exec->transcount >= atom->min) { + int transno = exec->transno; + xmlRegStatePtr state = exec->state; + + /* + * The transition is acceptable save it + */ + exec->transno = -1; /* trick */ + exec->state = to; + xmlFARegExecSave(exec); + exec->transno = transno; + exec->state = state; + } + codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), + len); + ret = xmlRegCheckCharacter(atom, codepoint); + exec->transcount++; + } while (ret == 1); + if (exec->transcount < atom->min) + ret = 0; + + /* + * If the last check failed but one transition was found + * possible, rollback + */ + if (ret < 0) + ret = 0; + if (ret == 0) { + goto rollback; + } + } + } + if (ret == 1) { + if (exec->state->nbTrans > exec->transno + 1) { + xmlFARegExecSave(exec); + } + if (trans->counter >= 0) { +#ifdef DEBUG_REGEXP_EXEC + printf("Increasing count %d\n", trans->counter); +#endif + exec->counts[trans->counter]++; + } +#ifdef DEBUG_REGEXP_EXEC + printf("entering state %d\n", trans->to); +#endif + exec->state = exec->comp->states[trans->to]; + exec->transno = 0; + if (trans->atom != NULL) { + exec->index += len; + } + goto progress; + } else if (ret < 0) { + exec->status = -4; + break; + } + } + if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { +rollback: + /* + * Failed to find a way out + */ + exec->determinist = 0; + xmlFARegExecRollBack(exec); + } +progress: + continue; + } +} +#endif +/************************************************************************ + * * + * Parser for the Shemas Datatype Regular Expressions * + * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * + * * + ************************************************************************/ + +/** + * xmlFAIsChar: + * ctxt: a regexp parser context + * + * [10] Char ::= [^.\?*+()|#x5B#x5D] + */ +static int +xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { + int cur; + int len; + + cur = CUR_SCHAR(ctxt->cur, len); + if ((cur == '.') || (cur == '\\') || (cur == '?') || + (cur == '*') || (cur == '+') || (cur == '(') || + (cur == ')') || (cur == '|') || (cur == 0x5B) || + (cur == 0x5D) || (cur == 0)) + return(-1); + return(cur); +} + +/** + * xmlFAParseCharProp: + * ctxt: a regexp parser context + * + * [27] charProp ::= IsCategory | IsBlock + * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | + * Separators | Symbols | Others + * [29] Letters ::= 'L' [ultmo]? + * [30] Marks ::= 'M' [nce]? + * [31] Numbers ::= 'N' [dlo]? + * [32] Punctuation ::= 'P' [cdseifo]? + * [33] Separators ::= 'Z' [slp]? + * [34] Symbols ::= 'S' [mcko]? + * [35] Others ::= 'C' [cfon]? + * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ + */ +static void +xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { + int cur; + xmlRegAtomType type = 0; + xmlChar *blockName = NULL; + + cur = CUR; + if (cur == 'L') { + NEXT; + cur = CUR; + if (cur == 'u') { + NEXT; + type = XML_REGEXP_LETTER_UPPERCASE; + } else if (cur == 'l') { + NEXT; + type = XML_REGEXP_LETTER_LOWERCASE; + } else if (cur == 't') { + NEXT; + type = XML_REGEXP_LETTER_TITLECASE; + } else if (cur == 'm') { + NEXT; + type = XML_REGEXP_LETTER_MODIFIER; + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_LETTER_OTHERS; + } else { + type = XML_REGEXP_LETTER; + } + } else if (cur == 'M') { + NEXT; + cur = CUR; + if (cur == 'n') { + NEXT; + /* nonspacing */ + type = XML_REGEXP_MARK_NONSPACING; + } else if (cur == 'c') { + NEXT; + /* spacing combining */ + type = XML_REGEXP_MARK_SPACECOMBINING; + } else if (cur == 'e') { + NEXT; + /* enclosing */ + type = XML_REGEXP_MARK_ENCLOSING; + } else { + /* all marks */ + type = XML_REGEXP_MARK; + } + } else if (cur == 'N') { + NEXT; + cur = CUR; + if (cur == 'd') { + NEXT; + /* digital */ + type = XML_REGEXP_NUMBER_DECIMAL; + } else if (cur == 'l') { + NEXT; + /* letter */ + type = XML_REGEXP_NUMBER_LETTER; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_NUMBER_OTHERS; + } else { + /* all numbers */ + type = XML_REGEXP_NUMBER; + } + } else if (cur == 'P') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* connector */ + type = XML_REGEXP_PUNCT_CONNECTOR; + } else if (cur == 'd') { + NEXT; + /* dash */ + type = XML_REGEXP_PUNCT_DASH; + } else if (cur == 's') { + NEXT; + /* open */ + type = XML_REGEXP_PUNCT_OPEN; + } else if (cur == 'e') { + NEXT; + /* close */ + type = XML_REGEXP_PUNCT_CLOSE; + } else if (cur == 'i') { + NEXT; + /* initial quote */ + type = XML_REGEXP_PUNCT_INITQUOTE; + } else if (cur == 'f') { + NEXT; + /* final quote */ + type = XML_REGEXP_PUNCT_FINQUOTE; + } else if (cur == 'o') { + NEXT; + /* other */ + type = XML_REGEXP_PUNCT_OTHERS; + } else { + /* all punctuation */ + type = XML_REGEXP_PUNCT; + } + } else if (cur == 'Z') { + NEXT; + cur = CUR; + if (cur == 's') { + NEXT; + /* space */ + type = XML_REGEXP_SEPAR_SPACE; + } else if (cur == 'l') { + NEXT; + /* line */ + type = XML_REGEXP_SEPAR_LINE; + } else if (cur == 'p') { + NEXT; + /* paragraph */ + type = XML_REGEXP_SEPAR_PARA; + } else { + /* all separators */ + type = XML_REGEXP_SEPAR; + } + } else if (cur == 'S') { + NEXT; + cur = CUR; + if (cur == 'm') { + NEXT; + type = XML_REGEXP_SYMBOL_MATH; + /* math */ + } else if (cur == 'c') { + NEXT; + type = XML_REGEXP_SYMBOL_CURRENCY; + /* currency */ + } else if (cur == 'k') { + NEXT; + type = XML_REGEXP_SYMBOL_MODIFIER; + /* modifiers */ + } else if (cur == 'o') { + NEXT; + type = XML_REGEXP_SYMBOL_OTHERS; + /* other */ + } else { + /* all symbols */ + type = XML_REGEXP_SYMBOL; + } + } else if (cur == 'C') { + NEXT; + cur = CUR; + if (cur == 'c') { + NEXT; + /* control */ + type = XML_REGEXP_OTHER_CONTROL; + } else if (cur == 'f') { + NEXT; + /* format */ + type = XML_REGEXP_OTHER_FORMAT; + } else if (cur == 'o') { + NEXT; + /* private use */ + type = XML_REGEXP_OTHER_PRIVATE; + } else if (cur == 'n') { + NEXT; + /* not assigned */ + type = XML_REGEXP_OTHER_NA; + } else { + /* all others */ + type = XML_REGEXP_OTHER; + } + } else if (cur == 'I') { + const xmlChar *start; + NEXT; + cur = CUR; + if (cur != 's') { + ERROR("IsXXXX expected"); + return; + } + NEXT; + start = ctxt->cur; + cur = CUR; + if (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + while (((cur >= 'a') && (cur <= 'z')) || + ((cur >= 'A') && (cur <= 'Z')) || + ((cur >= '0') && (cur <= '9')) || + (cur == 0x2D)) { + NEXT; + cur = CUR; + } + } + type = XML_REGEXP_BLOCK_NAME; + blockName = xmlStrndup(start, ctxt->cur - start); + } else { + ERROR("Unknown char property"); + return; + } + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + if (ctxt->atom != NULL) + ctxt->atom->valuep = blockName; + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, blockName); + } +} + +/** + * xmlFAParseCharClassEsc: + * ctxt: a regexp parser context + * + * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) + * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] + * [25] catEsc ::= '\p{' charProp '}' + * [26] complEsc ::= '\P{' charProp '}' + * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) + */ +static void +xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { + int cur; + + if (CUR == '.') { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_ANYCHAR, 0, 0, NULL); + } + NEXT; + return; + } + if (CUR != '\\') { + ERROR("Escaped sequence: expecting \\"); + return; + } + NEXT; + cur = CUR; + if (cur == 'p') { + NEXT; + if (CUR != '{') { + ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + if (CUR != '}') { + ERROR("Expecting '}'"); + return; + } + NEXT; + } else if (cur == 'P') { + NEXT; + if (CUR != '{') { + ERROR("Expecting '{'"); + return; + } + NEXT; + xmlFAParseCharProp(ctxt); + ctxt->atom->neg = 1; + if (CUR != '}') { + ERROR("Expecting '}'"); + return; + } + NEXT; + } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || + (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || + (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || + (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || + (cur == 0x5E)) { + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom != NULL) + ctxt->atom->codepoint = cur; + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, cur, cur, NULL); + } + NEXT; + } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || + (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || + (cur == 'w') || (cur == 'W')) { + xmlRegAtomType type; + + switch (cur) { + case 's': + type = XML_REGEXP_ANYSPACE; + break; + case 'S': + type = XML_REGEXP_NOTSPACE; + break; + case 'i': + type = XML_REGEXP_INITNAME; + break; + case 'I': + type = XML_REGEXP_NOTINITNAME; + break; + case 'c': + type = XML_REGEXP_NAMECHAR; + break; + case 'C': + type = XML_REGEXP_NOTNAMECHAR; + break; + case 'd': + type = XML_REGEXP_DECIMAL; + break; + case 'D': + type = XML_REGEXP_NOTDECIMAL; + break; + case 'w': + type = XML_REGEXP_REALCHAR; + break; + case 'W': + type = XML_REGEXP_NOTREALCHAR; + break; + } + NEXT; + if (ctxt->atom == NULL) { + ctxt->atom = xmlRegNewAtom(ctxt, type); + } else if (ctxt->atom->type == XML_REGEXP_RANGES) { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + type, 0, 0, NULL); + } + } +} + +/** + * xmlFAParseCharRef: + * ctxt: a regexp parser context + * + * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' ) + */ +static int +xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) { + int ret = 0, cur; + + if ((CUR != '&') || (NXT(1) != '#')) + return(-1); + NEXT; + NEXT; + cur = CUR; + if (cur == 'x') { + NEXT; + cur = CUR; + if (((cur >= '0') && (cur <= '9')) || + ((cur >= 'a') && (cur <= 'f')) || + ((cur >= 'A') && (cur <= 'F'))) { + while (((cur >= '0') && (cur <= '9')) || + ((cur >= 'A') && (cur <= 'F'))) { + if ((cur >= '0') && (cur <= '9')) + ret = ret * 16 + cur - '0'; + else if ((cur >= 'a') && (cur <= 'f')) + ret = ret * 16 + 10 + (cur - 'a'); + else + ret = ret * 16 + 10 + (cur - 'A'); + NEXT; + cur = CUR; + } + } else { + ERROR("Char ref: expecting [0-9A-F]"); + return(-1); + } + } else { + if ((cur >= '0') && (cur <= '9')) { + while ((cur >= '0') && (cur <= '9')) { + ret = ret * 10 + cur - '0'; + NEXT; + cur = CUR; + } + } else { + ERROR("Char ref: expecting [0-9]"); + return(-1); + } + } + if (cur != ';') { + ERROR("Char ref: expecting ';'"); + return(-1); + } else { + NEXT; + } + return(ret); +} + +/** + * xmlFAParseCharRange: + * ctxt: a regexp parser context + * + * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash + * [18] seRange ::= charOrEsc '-' charOrEsc + * [20] charOrEsc ::= XmlChar | SingleCharEsc + * [21] XmlChar ::= [^\#x2D#x5B#x5D] + * [22] XmlCharIncDash ::= [^\#x5B#x5D] + */ +static void +xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { + int cur; + int start = -1; + int end = -1; + + if ((CUR == '&') && (NXT(1) == '#')) { + end = start = xmlFAParseCharRef(ctxt); + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + return; + } + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': start = 0xA; break; + case 'r': start = 0xD; break; + case 't': start = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + start = cur; break; + default: + ERROR("Invalid escape value"); + return; + } + end = start; + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = start = cur; + } else { + ERROR("Expecting a char range"); + return; + } + NEXT; + if (start == '-') { + return; + } + cur = CUR; + if (cur != '-') { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + return; + } + NEXT; + cur = CUR; + if (cur == '\\') { + NEXT; + cur = CUR; + switch (cur) { + case 'n': end = 0xA; break; + case 'r': end = 0xD; break; + case 't': end = 0x9; break; + case '\\': case '|': case '.': case '-': case '^': case '?': + case '*': case '+': case '{': case '}': case '(': case ')': + case '[': case ']': + end = cur; break; + default: + ERROR("Invalid escape value"); + return; + } + } else if ((cur != 0x5B) && (cur != 0x5D)) { + end = cur; + } else { + ERROR("Expecting the end of a char range"); + return; + } + NEXT; + /* TODO check that the values are acceptable character ranges for XML */ + if (end < start) { + ERROR("End of range is before start of range"); + } else { + xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, + XML_REGEXP_CHARVAL, start, end, NULL); + } + return; +} + +/** + * xmlFAParsePosCharGroup: + * ctxt: a regexp parser context + * + * [14] posCharGroup ::= ( charRange | charClassEsc )+ + */ +static void +xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { + do { + if ((CUR == '\\') || (CUR == '.')) { + xmlFAParseCharClassEsc(ctxt); + } else { + xmlFAParseCharRange(ctxt); + } + } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && + (ctxt->error == 0)); +} + +/** + * xmlFAParseCharGroup: + * ctxt: a regexp parser context + * + * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub + * [15] negCharGroup ::= '^' posCharGroup + * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { + int n = ctxt->neg; + while ((CUR != ']') && (ctxt->error == 0)) { + if (CUR == '^') { + int neg = ctxt->neg; + + NEXT; + ctxt->neg = !ctxt->neg; + xmlFAParsePosCharGroup(ctxt); + ctxt->neg = neg; + } else if (CUR == '-') { + NEXT; + ctxt->neg = !ctxt->neg; + if (CUR != '[') { + ERROR("charClassExpr: '[' expected"); + break; + } + NEXT; + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + ERROR("charClassExpr: ']' expected"); + break; + } + break; + } else if (CUR != ']') { + xmlFAParsePosCharGroup(ctxt); + } + } + ctxt->neg = n; +} + +/** + * xmlFAParseCharClass: + * ctxt: a regexp parser context + * + * [11] charClass ::= charClassEsc | charClassExpr + * [12] charClassExpr ::= '[' charGroup ']' + */ +static void +xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { + if (CUR == '[') { + NEXT; + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); + if (ctxt->atom == NULL) + return; + xmlFAParseCharGroup(ctxt); + if (CUR == ']') { + NEXT; + } else { + ERROR("xmlFAParseCharClass: ']' expected"); + } + } else { + xmlFAParseCharClassEsc(ctxt); + } +} + +/** + * xmlFAParseQuantExact: + * ctxt: a regexp parser context + * + * [8] QuantExact ::= [0-9]+ + */ +static int +xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { + int ret = 0; + int ok = 0; + + while ((CUR >= '0') && (CUR <= '9')) { + ret = ret * 10 + (CUR - '0'); + ok = 1; + NEXT; + } + if (ok != 1) { + return(-1); + } + return(ret); +} + +/** + * xmlFAParseQuantifier: + * ctxt: a regexp parser context + * + * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) + * [5] quantity ::= quantRange | quantMin | QuantExact + * [6] quantRange ::= QuantExact ',' QuantExact + * [7] quantMin ::= QuantExact ',' + * [8] QuantExact ::= [0-9]+ + */ +static int +xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { + int cur; + + cur = CUR; + if ((cur == '?') || (cur == '*') || (cur == '+')) { + if (ctxt->atom != NULL) { + if (cur == '?') + ctxt->atom->quant = XML_REGEXP_QUANT_OPT; + else if (cur == '*') + ctxt->atom->quant = XML_REGEXP_QUANT_MULT; + else if (cur == '+') + ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; + } + NEXT; + return(1); + } + if (cur == '{') { + int min = 0, max = 0; + + NEXT; + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + min = cur; + if (CUR == ',') { + NEXT; + cur = xmlFAParseQuantExact(ctxt); + if (cur >= 0) + max = cur; + } + if (CUR == '}') { + NEXT; + } else { + ERROR("Unterminated quantifier"); + } + if (max == 0) + max = min; + if (ctxt->atom != NULL) { + ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; + ctxt->atom->min = min; + ctxt->atom->max = max; + } + return(1); + } + return(0); +} + +/** + * xmlFAParseAtom: + * ctxt: a regexp parser context + * + * [9] atom ::= Char | charClass | ( '(' regExp ')' ) + */ +static int +xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { + int codepoint, len; + + codepoint = xmlFAIsChar(ctxt); + if (codepoint > 0) { + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); + if (ctxt->atom == NULL) + return(-1); + codepoint = CUR_SCHAR(ctxt->cur, len); + ctxt->atom->codepoint = codepoint; + NEXTL(len); + return(1); + } else if (CUR == '|') { + return(0); + } else if (CUR == 0) { + return(0); + } else if (CUR == ')') { + return(0); + } else if (CUR == '(') { + xmlRegStatePtr start, oldend; + + NEXT; + xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); + start = ctxt->state; + oldend = ctxt->end; + ctxt->end = NULL; + ctxt->atom = NULL; + xmlFAParseRegExp(ctxt, 0); + if (CUR == ')') { + NEXT; + } else { + ERROR("xmlFAParseAtom: expecting ')'"); + } + ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); + if (ctxt->atom == NULL) + return(-1); + ctxt->atom->start = start; + ctxt->atom->stop = ctxt->state; + ctxt->end = oldend; + return(1); + } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { + xmlFAParseCharClass(ctxt); + return(1); + } + return(0); +} + +/** + * xmlFAParsePiece: + * ctxt: a regexp parser context + * + * [3] piece ::= atom quantifier? + */ +static int +xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { + int ret; + + ctxt->atom = NULL; + ret = xmlFAParseAtom(ctxt); + if (ret == 0) + return(0); + if (ctxt->atom == NULL) { + ERROR("internal: no atom generated"); + } + xmlFAParseQuantifier(ctxt); + return(1); +} + +/** + * xmlFAParseBranch: + * ctxt: a regexp parser context + * first: is taht the first + * + * [2] branch ::= piece* + */ +static void +xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) { + xmlRegStatePtr previous; + xmlRegAtomPtr prevatom = NULL; + int ret; + + previous = ctxt->state; + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (first) { + xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); + previous = ctxt->state; + } else { + prevatom = ctxt->atom; + } + ctxt->atom = NULL; + } + while ((ret != 0) && (ctxt->error == 0)) { + ret = xmlFAParsePiece(ctxt); + if (ret != 0) { + if (first) { + xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); + } else { + xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom); + prevatom = ctxt->atom; + } + previous = ctxt->state; + ctxt->atom = NULL; + } + } + if (!first) { + xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom); + } +} + +/** + * xmlFAParseRegExp: + * ctxt: a regexp parser context + * top: is that the top-level expressions ? + * + * [1] regExp ::= branch ( '|' branch )* + */ +static void +xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { + xmlRegStatePtr start, end, oldend; + + oldend = ctxt->end; + + start = ctxt->state; + xmlFAParseBranch(ctxt, (ctxt->end == NULL)); + if (CUR != '|') { + ctxt->end = ctxt->state; + return; + } + end = ctxt->state; + while ((CUR == '|') && (ctxt->error == 0)) { + NEXT; + ctxt->state = start; + ctxt->end = end; + xmlFAParseBranch(ctxt, 0); + } + if (!top) + ctxt->end = oldend; +} + +/************************************************************************ + * * + * The basic API * + * * + ************************************************************************/ + +/** + * xmlRegexpPrint: + * @output: the file for the output debug + * @regexp: the compiled regexp + * + * Print the content of the compiled regular expression + */ +void +xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { + int i; + + fprintf(output, " regexp: "); + if (regexp == NULL) { + fprintf(output, "NULL\n"); + return; + } + fprintf(output, "'%s' ", regexp->string); + fprintf(output, "\n"); + fprintf(output, "%d atoms:\n", regexp->nbAtoms); + for (i = 0;i < regexp->nbAtoms; i++) { + fprintf(output, " %02d ", i); + xmlRegPrintAtom(output, regexp->atoms[i]); + } + fprintf(output, "%d states:", regexp->nbStates); + fprintf(output, "\n"); + for (i = 0;i < regexp->nbStates; i++) { + xmlRegPrintState(output, regexp->states[i]); + } + fprintf(output, "%d counters:\n", regexp->nbCounters); + for (i = 0;i < regexp->nbCounters; i++) { + fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, + regexp->counters[i].max); + } +} + +/** + * xmlRegexpCompile: + * @regexp: a regular expression string + * + * Parses a regular expression conforming to XML Schemas Part 2 Datatype + * Appendix F and build an automata suitable for testing strings against + * that regular expression + * + * Returns the compiled expression or NULL in case of error + */ +xmlRegexpPtr +xmlRegexpCompile(const xmlChar *regexp) { + xmlRegexpPtr ret; + xmlRegParserCtxtPtr ctxt; + + ctxt = xmlRegNewParserCtxt(regexp); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, ctxt->start); + + /* parse the expression building an automata */ + xmlFAParseRegExp(ctxt, 1); + if (CUR != 0) { + ERROR("xmlFAParseRegExp: extra characters"); + } + ctxt->end = ctxt->state; + ctxt->start->type = XML_REGEXP_START_STATE; + ctxt->end->type = XML_REGEXP_FINAL_STATE; + + /* remove the Epsilon except for counted transitions */ + xmlFAEliminateEpsilonTransitions(ctxt); + + + if (ctxt->error != 0) { + xmlRegFreeParserCtxt(ctxt); + return(NULL); + } + ret = xmlRegEpxFromParse(ctxt); + xmlRegFreeParserCtxt(ctxt); + return(ret); +} + +/** + * xmlRegexpExec: + * @comp: the compiled regular expression + * @content: the value to check against the regular expression + * + * Check if the regular expression generate the value + * + * Returns 1 if it matches, 0 if not and a negativa value in case of error + */ +int +xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { + if ((comp == NULL) || (content == NULL)) + return(-1); + return(xmlFARegExec(comp, content)); +} + +/** + * xmlRegFreeRegexp: + * @regexp: the regexp + * + * Free a regexp + */ +void +xmlRegFreeRegexp(xmlRegexpPtr regexp) { + int i; + if (regexp == NULL) + return; + + if (regexp->string != NULL) + xmlFree(regexp->string); + if (regexp->states != NULL) { + for (i = 0;i < regexp->nbStates;i++) + xmlRegFreeState(regexp->states[i]); + xmlFree(regexp->states); + } + if (regexp->atoms != NULL) { + for (i = 0;i < regexp->nbAtoms;i++) + xmlRegFreeAtom(regexp->atoms[i]); + xmlFree(regexp->atoms); + } + if (regexp->counters != NULL) + xmlFree(regexp->counters); + xmlFree(regexp); +} + +#ifdef LIBXML_AUTOMATA_ENABLED +/************************************************************************ + * * + * The Automata interface * + * * + ************************************************************************/ + +/** + * xmlNewAutomata: + * + * Create a new automata + * + * Returns the new object or NULL in case of failure + */ +xmlAutomataPtr +xmlNewAutomata(void) { + xmlAutomataPtr ctxt; + + ctxt = xmlRegNewParserCtxt(NULL); + if (ctxt == NULL) + return(NULL); + + /* initialize the parser */ + ctxt->end = NULL; + ctxt->start = ctxt->state = xmlRegNewState(ctxt); + xmlRegStatePush(ctxt, ctxt->start); + + return(ctxt); +} + +/** + * xmlFreeAutomata: + * @am: an automata + * + * Free an automata + */ +void +xmlFreeAutomata(xmlAutomataPtr am) { + if (am == NULL) + return; + xmlRegFreeParserCtxt(am); +} + +/** + * xmlAutomataGetInitState: + * @am: an automata + * + * Returns the initial state of the automata + */ +xmlAutomataStatePtr +xmlAutomataGetInitState(xmlAutomataPtr am) { + if (am == NULL) + return(NULL); + return(am->start); +} + +/** + * xmlAutomataSetFinalState: + * @am: an automata + * @state: a state in this automata + * + * Makes that state a final state + * + * Returns 0 or -1 in case of error + */ +int +xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { + if ((am == NULL) || (state == NULL)) + return(-1); + state->type = XML_REGEXP_FINAL_STATE; + return(0); +} + +/** + * xmlAutomataNewTransition: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @token: the input string associated to that transition + * @data: data passed to the callback function if the transition is activated + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a transition from the @from state to the target state + * activated by the value of @token + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, const xmlChar *token, + void *data) { + xmlRegAtomPtr atom; + + if ((am == NULL) || (from == NULL) || (token == NULL)) + return(NULL); + atom = xmlRegNewAtom(am, XML_REGEXP_STRING); + atom->data = data; + if (atom == NULL) + return(NULL); + atom->valuep = xmlStrdup(token); + + xmlFAGenerateTransitions(am, from, to, atom); + if (to == NULL) + return(am->state); + return(to); +} + +/** + * xmlAutomataNewCountTrans: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * @token: the input string associated to that transition + * @min: the minimum successive occurences of token + * @min: the maximum successive occurences of token + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a transition from the @from state to the target state + * activated by a succession of input of value @token and whose number + * is between @min and @max + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to, const xmlChar *token, + int min, int max, void *data) { + xmlRegAtomPtr atom; + + if ((am == NULL) || (from == NULL) || (token == NULL)) + return(NULL); + if (min < 0) + return(NULL); + if ((max < min) || (max < 1)) + return(NULL); + atom = xmlRegNewAtom(am, XML_REGEXP_STRING); + if (atom == NULL) + return(NULL); + atom->valuep = xmlStrdup(token); + atom->data = data; + if (min == 0) + atom->min = 1; + else + atom->min = min; + atom->max = max; + + xmlFAGenerateTransitions(am, from, to, atom); + if (to == NULL) + to = am->state; + if (to == NULL) + return(NULL); + if (min == 0) + xmlFAGenerateEpsilonTransition(am, from, to); + return(to); +} + +/** + * xmlAutomataNewState: + * @am: an automata + * + * Create a new disconnected state in the automata + * + * Returns the new state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewState(xmlAutomataPtr am) { + xmlAutomataStatePtr to; + + if (am == NULL) + return(NULL); + to = xmlRegNewState(am); + xmlRegStatePush(am, to); + return(to); +} + +/** + * xmlAutomataNewTransition: + * @am: an automata + * @from: the starting point of the transition + * @to: the target point of the transition or NULL + * + * If @to is NULL, this create first a new target state in the automata + * and then adds a an epsilon transition from the @from state to the + * target state + * + * Returns the target state or NULL in case of error + */ +xmlAutomataStatePtr +xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, + xmlAutomataStatePtr to) { + if ((am == NULL) || (from == NULL)) + return(NULL); + xmlFAGenerateEpsilonTransition(am, from, to); + if (to == NULL) + return(am->state); + return(to); +} + +#if 0 +int xmlAutomataNewCounter (xmlAutomataPtr am); +#endif + +/** + * xmlAutomataCompile: + * @am: an automata + * + * Compile the automata into a Reg Exp ready for being executed. + * The automata should be free after this point. + * + * Returns the compiled regexp or NULL in case of error + */ +xmlRegexpPtr +xmlAutomataCompile(xmlAutomataPtr am) { + xmlRegexpPtr ret; + + xmlFAEliminateEpsilonTransitions(am); + ret = xmlRegEpxFromParse(am); + + return(ret); +} +#endif /* LIBXML_AUTOMATA_ENABLED */ +#endif /* LIBXML_REGEXP_ENABLED */ diff --git a/xmlschemas.c b/xmlschemas.c new file mode 100644 index 00000000..f2110959 --- /dev/null +++ b/xmlschemas.c @@ -0,0 +1,4941 @@ +/* + * schemas.c : implementation of the XML Schema handling and + * schema validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define DEBUG 1 /* very verobose output */ +/* #define DEBUG_CONTENT 1 */ +/* #define DEBUG_AUTOMATA 1 */ + +#define UNBOUNDED (1 << 30) +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +/* + * The XML Schemas namespaces + */ +static const xmlChar *xmlSchemaNs = (const xmlChar *) + "http://www.w3.org/2001/XMLSchema"; + +static const xmlChar *xmlSchemaInstanceNs = (const xmlChar *) + "http://www.w3.org/2001/XMLSchema-instance"; + +#define IS_SCHEMA(node, type) \ + ((node != NULL) && (node->ns != NULL) && \ + (xmlStrEqual(node->name, (const xmlChar *) type)) && \ + (xmlStrEqual(node->ns->href, xmlSchemaNs))) + +#define XML_SCHEMAS_PARSE_ERROR 1 + +struct _xmlSchemaParserCtxt { + void *userData; /* user specific data block */ + xmlSchemaValidityErrorFunc error; /* the callback in case of errors */ + xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */ + + xmlSchemaPtr schema; /* The schema in use */ + xmlChar *container; /* the current element, group, ... */ + int counter; + + xmlChar *URL; + xmlDocPtr doc; + + /* + * Used to build complex element content models + */ + xmlAutomataPtr am; + xmlAutomataStatePtr start; + xmlAutomataStatePtr end; + xmlAutomataStatePtr state; +}; + + +#define XML_SCHEMAS_ATTR_UNKNOWN 1 +#define XML_SCHEMAS_ATTR_CHECKED 2 + +typedef struct _xmlSchemaAttrState xmlSchemaAttrState; +typedef xmlSchemaAttrState *xmlSchemaAttrStatePtr; +struct _xmlSchemaAttrState { + xmlAttrPtr attr; + int state; +}; + +/** + * xmlSchemaValidCtxt: + * + * A Schemas validation context + */ + +struct _xmlSchemaValidCtxt { + void *userData; /* user specific data block */ + xmlSchemaValidityErrorFunc error; /* the callback in case of errors */ + xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */ + + xmlSchemaPtr schema; /* The schema in use */ + xmlDocPtr doc; + xmlParserInputBufferPtr input; + xmlCharEncoding enc; + xmlSAXHandlerPtr sax; + void *user_data; + + xmlDocPtr myDoc; + int err; + + xmlNodePtr node; + xmlSchemaTypePtr type; + + xmlRegExecCtxtPtr regexp; + xmlSchemaValPtr value; + + int attrNr; + int attrBase; + int attrMax; + xmlSchemaAttrStatePtr attr; +}; + + +/************************************************************************ + * * + * Some predeclarations * + * * + ************************************************************************/ +static int xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaTypePtr type, + xmlChar *value); + +/************************************************************************ + * * + * Allocation functions * + * * + ************************************************************************/ + +/** + * xmlSchemaNewSchema: + * @ctxt: a schema validation context (optional) + * + * Allocate a new Schema structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaPtr +xmlSchemaNewSchema(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaPtr ret; + + ret = (xmlSchemaPtr) xmlMalloc(sizeof(xmlSchema)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchema)); + + return (ret); +} + +/** + * xmlSchemaNewFacet: + * @ctxt: a schema validation context (optional) + * + * Allocate a new Facet structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaFacetPtr +xmlSchemaNewFacet(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaFacetPtr ret; + + ret = (xmlSchemaFacetPtr) xmlMalloc(sizeof(xmlSchemaFacet)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaFacet)); + + return (ret); +} + +/** + * xmlSchemaNewAnnot: + * @ctxt: a schema validation context (optional) + * @node: a node + * + * Allocate a new annotation structure. + * + * Returns the newly allocated structure or NULL in case or error + */ +static xmlSchemaAnnotPtr +xmlSchemaNewAnnot(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) +{ + xmlSchemaAnnotPtr ret; + + ret = (xmlSchemaAnnotPtr) xmlMalloc(sizeof(xmlSchemaAnnot)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAnnot)); + ret->content = node; + return (ret); +} + +/** + * xmlSchemaFreeNotation: + * @schema: a schema notation structure + * + * Deallocate a Schema Notation structure. + */ +static void +xmlSchemaFreeNotation(xmlSchemaNotationPtr nota) +{ + if (nota == NULL) + return; + if (nota->name != NULL) + xmlFree((xmlChar *) nota->name); + xmlFree(nota); +} + +/** + * xmlSchemaFreeAttribute: + * @schema: a schema attribute structure + * + * Deallocate a Schema Attribute structure. + */ +static void +xmlSchemaFreeAttribute(xmlSchemaAttributePtr attr) +{ + if (attr == NULL) + return; + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + if (attr->ref != NULL) + xmlFree((xmlChar *) attr->ref); + if (attr->refNs != NULL) + xmlFree((xmlChar *) attr->refNs); + if (attr->typeName != NULL) + xmlFree((xmlChar *) attr->typeName); + if (attr->typeNs != NULL) + xmlFree((xmlChar *) attr->typeNs); + xmlFree(attr); +} + +/** + * xmlSchemaFreeAttributeGroup: + * @schema: a schema attribute group structure + * + * Deallocate a Schema Attribute Group structure. + */ +static void +xmlSchemaFreeAttributeGroup(xmlSchemaAttributeGroupPtr attr) +{ + if (attr == NULL) + return; + if (attr->name != NULL) + xmlFree((xmlChar *) attr->name); + xmlFree(attr); +} + +/** + * xmlSchemaFreeElement: + * @schema: a schema element structure + * + * Deallocate a Schema Element structure. + */ +static void +xmlSchemaFreeElement(xmlSchemaElementPtr elem) +{ + if (elem == NULL) + return; + if (elem->name != NULL) + xmlFree((xmlChar *) elem->name); + if (elem->namedType != NULL) + xmlFree((xmlChar *) elem->namedType); + if (elem->namedTypeNs != NULL) + xmlFree((xmlChar *) elem->namedTypeNs); + if (elem->ref != NULL) + xmlFree((xmlChar *) elem->ref); + if (elem->refNs != NULL) + xmlFree((xmlChar *) elem->refNs); + if (elem->contModel != NULL) + xmlRegFreeRegexp(elem->contModel); + xmlFree(elem); +} + +/** + * xmlSchemaFreeFacet: + * @facet: a schema facet structure + * + * Deallocate a Schema Facet structure. + */ +static void +xmlSchemaFreeFacet(xmlSchemaFacetPtr facet) +{ + if (facet == NULL) + return; + if (facet->value != NULL) + xmlFree((xmlChar *) facet->value); + if (facet->id != NULL) + xmlFree((xmlChar *) facet->id); + if (facet->val != NULL) + xmlSchemaFreeValue(facet->val); + if (facet->regexp != NULL) + xmlRegFreeRegexp(facet->regexp); + xmlFree(facet); +} + +/** + * xmlSchemaFreeType: + * @type: a schema type structure + * + * Deallocate a Schema Type structure. + */ +void +xmlSchemaFreeType(xmlSchemaTypePtr type) +{ + if (type == NULL) + return; + if (type->name != NULL) + xmlFree((xmlChar *) type->name); + if (type->base != NULL) + xmlFree((xmlChar *) type->base); + if (type->baseNs != NULL) + xmlFree((xmlChar *) type->baseNs); + if (type->annot != NULL) + xmlFree((xmlChar *) type->annot); + if (type->facets != NULL) { + xmlSchemaFacetPtr facet, next; + + facet = type->facets; + while (facet != NULL) { + next = facet->next; + xmlSchemaFreeFacet(facet); + facet = next; + } + } + xmlFree(type); +} + +/** + * xmlSchemaFreeAnnot: + * @annot: a schema type structure + * + * Deallocate a annotation structure + */ +static void +xmlSchemaFreeAnnot(xmlSchemaAnnotPtr annot) +{ + if (annot == NULL) + return; + xmlFree(annot); +} + +/** + * xmlSchemaFree: + * @schema: a schema structure + * + * Deallocate a Schema structure. + */ +void +xmlSchemaFree(xmlSchemaPtr schema) +{ + if (schema == NULL) + return; + + if (schema->name != NULL) + xmlFree((xmlChar *) schema->name); + if (schema->notaDecl != NULL) + xmlHashFree(schema->notaDecl, + (xmlHashDeallocator) xmlSchemaFreeNotation); + if (schema->attrDecl != NULL) + xmlHashFree(schema->attrDecl, + (xmlHashDeallocator) xmlSchemaFreeAttribute); + if (schema->attrgrpDecl != NULL) + xmlHashFree(schema->attrgrpDecl, + (xmlHashDeallocator) xmlSchemaFreeAttributeGroup); + if (schema->elemDecl != NULL) + xmlHashFree(schema->elemDecl, + (xmlHashDeallocator) xmlSchemaFreeElement); + if (schema->typeDecl != NULL) + xmlHashFree(schema->typeDecl, + (xmlHashDeallocator) xmlSchemaFreeType); + if (schema->annot != NULL) + xmlSchemaFreeAnnot(schema->annot); + if (schema->doc != NULL) + xmlFreeDoc(schema->doc); + + xmlFree(schema); +} + +/************************************************************************ + * * + * Error functions * + * * + ************************************************************************/ + +/** + * xmlSchemaErrorContext: + * @ctxt: the parsing context + * @schema: the schema being built + * @node: the node being processed + * @child: the child being processed + * + * Dump a SchemaType structure + */ +static void +xmlSchemaErrorContext(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, xmlNodePtr child) +{ + int line = 0; + const xmlChar *file = NULL; + const xmlChar *name = NULL; + const char *type = "error"; + + if ((ctxt == NULL) || (ctxt->error == NULL)) + return; + + if (child != NULL) + node = child; + + if (node != NULL) { + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE)) { + xmlDocPtr doc = (xmlDocPtr) node; + + file = doc->URL; + } else { + /* + * Try to find contextual informations to report + */ + if (node->type == XML_ELEMENT_NODE) { + line = (int) node->content; + } else if ((node->prev != NULL) && + (node->prev->type == XML_ELEMENT_NODE)) { + line = (int) node->prev->content; + } else if ((node->parent != NULL) && + (node->parent->type == XML_ELEMENT_NODE)) { + line = (int) node->parent->content; + } + if ((node->doc != NULL) && (node->doc->URL != NULL)) + file = node->doc->URL; + if (node->name != NULL) + name = node->name; + } + } + + if (ctxt != NULL) + type = "compilation error"; + else if (schema != NULL) + type = "runtime error"; + + if ((file != NULL) && (line != 0) && (name != NULL)) + ctxt->error(ctxt->userData, "%s: file %s line %d element %s\n", + type, file, line, name); + else if ((file != NULL) && (name != NULL)) + ctxt->error(ctxt->userData, "%s: file %s element %s\n", + type, file, name); + else if ((file != NULL) && (line != 0)) + ctxt->error(ctxt->userData, "%s: file %s line %d\n", type, file, line); + else if (file != NULL) + ctxt->error(ctxt->userData, "%s: file %s\n", type, file); + else if (name != NULL) + ctxt->error(ctxt->userData, "%s: element %s\n", type, name); + else + ctxt->error(ctxt->userData, "%s\n", type); +} + +/************************************************************************ + * * + * Debug functions * + * * + ************************************************************************/ + +/** + * xmlSchemaElementDump: + * @elem: an element + * @output: the file output + * + * Dump the element + */ +static void +xmlSchemaElementDump(xmlSchemaElementPtr elem, FILE * output, + ATTRIBUTE_UNUSED const xmlChar *name, + ATTRIBUTE_UNUSED const xmlChar *context, + ATTRIBUTE_UNUSED const xmlChar *namespace) +{ + if (elem == NULL) + return; + + fprintf(output, "Element "); + if (elem->flags & XML_SCHEMAS_ELEM_TOPLEVEL) + fprintf(output, "toplevel "); + fprintf(output, ": %s ", elem->name); + if (namespace != NULL) + fprintf(output, "namespace '%s' ", namespace); + + if (elem->flags & XML_SCHEMAS_ELEM_NILLABLE) + fprintf(output, "nillable "); + if (elem->flags & XML_SCHEMAS_ELEM_GLOBAL) + fprintf(output, "global "); + if (elem->flags & XML_SCHEMAS_ELEM_DEFAULT) + fprintf(output, "default "); + if (elem->flags & XML_SCHEMAS_ELEM_FIXED) + fprintf(output, "fixed "); + if (elem->flags & XML_SCHEMAS_ELEM_ABSTRACT) + fprintf(output, "abstract "); + if (elem->flags & XML_SCHEMAS_ELEM_REF) + fprintf(output, "ref '%s' ", elem->ref); + if (elem->id != NULL) + fprintf(output, "id '%s' ", elem->id); + fprintf(output, "\n"); + if ((elem->minOccurs != 1) || (elem->maxOccurs != 1)) { + fprintf(output, " "); + if (elem->minOccurs != 1) + fprintf(output, "min: %d ", elem->minOccurs); + if (elem->maxOccurs >= UNBOUNDED) + fprintf(output, "max: unbounded\n"); + else if (elem->maxOccurs != 1) + fprintf(output, "max: %d\n", elem->maxOccurs); + else + fprintf(output, "\n"); + } + if (elem->namedType != NULL) { + fprintf(output, " type: %s", elem->namedType); + if (elem->namedTypeNs != NULL) + fprintf(output, " ns %s\n", elem->namedTypeNs); + else + fprintf(output, "\n"); + } + if (elem->substGroup != NULL) { + fprintf(output, " substitutionGroup: %s", elem->substGroup); + if (elem->substGroupNs != NULL) + fprintf(output, " ns %s\n", elem->substGroupNs); + else + fprintf(output, "\n"); + } + if (elem->value != NULL) + fprintf(output, " default: %s", elem->value); +} + +/** + * xmlSchemaAnnotDump: + * @output: the file output + * @annot: a annotation + * + * Dump the annotation + */ +static void +xmlSchemaAnnotDump(FILE * output, xmlSchemaAnnotPtr annot) +{ + xmlChar *content; + + if (annot == NULL) + return; + + content = xmlNodeGetContent(annot->content); + if (content != NULL) { + fprintf(output, " Annot: %s\n", content); + xmlFree(content); + } else + fprintf(output, " Annot: empty\n"); +} + +/** + * xmlSchemaTypeDump: + * @output: the file output + * @type: a type structure + * + * Dump a SchemaType structure + */ +static void +xmlSchemaTypeDump(xmlSchemaTypePtr type, FILE * output) +{ + if (type == NULL) { + fprintf(output, "Type: NULL\n"); + return; + } + fprintf(output, "Type: "); + if (type->name != NULL) + fprintf(output, "%s, ", type->name); + else + fprintf(output, "no name"); + switch (type->type) { + case XML_SCHEMA_TYPE_BASIC: + fprintf(output, "basic "); + break; + case XML_SCHEMA_TYPE_SIMPLE: + fprintf(output, "simple "); + break; + case XML_SCHEMA_TYPE_COMPLEX: + fprintf(output, "complex "); + break; + case XML_SCHEMA_TYPE_SEQUENCE: + fprintf(output, "sequence "); + break; + case XML_SCHEMA_TYPE_CHOICE: + fprintf(output, "choice "); + break; + case XML_SCHEMA_TYPE_ALL: + fprintf(output, "all "); + break; + case XML_SCHEMA_TYPE_UR: + fprintf(output, "ur "); + break; + case XML_SCHEMA_TYPE_RESTRICTION: + fprintf(output, "restriction "); + break; + case XML_SCHEMA_TYPE_EXTENSION: + fprintf(output, "extension "); + break; + default: + fprintf(output, "unknowntype%d ", type->type); + break; + } + if (type->base != NULL) { + fprintf(output, "base %s, ", type->base); + } + switch (type->contentType) { + case XML_SCHEMA_CONTENT_UNKNOWN: + fprintf(output, "unknown "); + break; + case XML_SCHEMA_CONTENT_EMPTY: + fprintf(output, "empty "); + break; + case XML_SCHEMA_CONTENT_ELEMENTS: + fprintf(output, "element "); + break; + case XML_SCHEMA_CONTENT_MIXED: + fprintf(output, "mixed "); + break; + case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS: + fprintf(output, "mixed_or_elems "); + break; + case XML_SCHEMA_CONTENT_BASIC: + fprintf(output, "basic "); + break; + case XML_SCHEMA_CONTENT_SIMPLE: + fprintf(output, "simple "); + break; + } + fprintf(output, "\n"); + if ((type->minOccurs != 1) || (type->maxOccurs != 1)) { + fprintf(output, " "); + if (type->minOccurs != 1) + fprintf(output, "min: %d ", type->minOccurs); + if (type->maxOccurs >= UNBOUNDED) + fprintf(output, "max: unbounded\n"); + else if (type->maxOccurs != 1) + fprintf(output, "max: %d\n", type->maxOccurs); + else + fprintf(output, "\n"); + } + if (type->annot != NULL) + xmlSchemaAnnotDump(output, type->annot); + if (type->subtypes != NULL) { + xmlSchemaTypePtr sub = type->subtypes; + + fprintf(output, " subtypes: "); + while (sub != NULL) { + fprintf(output, "%s ", sub->name); + sub = sub->next; + } + fprintf(output, "\n"); + } + +} + +/** + * xmlSchemaDump: + * @output: the file output + * @schema: a schema structure + * + * Dump a Schema structure. + */ +void +xmlSchemaDump(FILE * output, xmlSchemaPtr schema) +{ + if (schema == NULL) { + fprintf(output, "Schemas: NULL\n"); + return; + } + fprintf(output, "Schemas: "); + if (schema->name != NULL) + fprintf(output, "%s, ", schema->name); + else + fprintf(output, "no name, "); + if (schema->targetNamespace != NULL) + fprintf(output, "%s", schema->targetNamespace); + else + fprintf(output, "no target namespace"); + fprintf(output, "\n"); + if (schema->annot != NULL) + xmlSchemaAnnotDump(output, schema->annot); + + xmlHashScan(schema->typeDecl, (xmlHashScanner) xmlSchemaTypeDump, + output); + xmlHashScanFull(schema->elemDecl, + (xmlHashScannerFull) xmlSchemaElementDump, output); +} + +/************************************************************************ + * * + * Parsing functions * + * * + ************************************************************************/ + +/** + * xmlSchemaGetType: + * @schema: the schemas context + * @name: the type name + * @ns: the type namespace + * + * Lookup a type in the schemas or the predefined types + * + * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise + */ +static xmlSchemaTypePtr +xmlSchemaGetType(xmlSchemaPtr schema, const xmlChar * name, + const xmlChar * namespace) { + xmlSchemaTypePtr ret; + + if (name == NULL) + return(NULL); + if (schema != NULL) { + ret = xmlHashLookup2(schema->typeDecl, name, namespace); + if (ret != NULL) + return(ret); + } + ret = xmlSchemaGetPredefinedType(name, namespace); +#ifdef DEBUG + if (ret == NULL) { + if (namespace == NULL) + fprintf(stderr, "Unable to lookup type %s", name); + else + fprintf(stderr, "Unable to lookup type %s:%s", name, namespace); + } +#endif + return(ret); +} + +/************************************************************************ + * * + * Parsing functions * + * * + ************************************************************************/ + +#define IS_BLANK_NODE(n) \ + (((n)->type == XML_TEXT_NODE) && (xmlSchemaIsBlank((n)->content))) + +/** + * xmlSchemaIsBlank: + * @str: a string + * + * Check if a string is ignorable + * + * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise + */ +static int +xmlSchemaIsBlank(xmlChar *str) { + if (str == NULL) + return(1); + while (*str != 0) { + if (!(IS_BLANK(*str))) return(0); + str++; + } + return(1); +} + +/** + * xmlSchemaAddNotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaNotationPtr +xmlSchemaAddNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaNotationPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->notaDecl == NULL) + schema->notaDecl = xmlHashCreate(10); + if (schema->notaDecl == NULL) + return (NULL); + + ret = (xmlSchemaNotationPtr) xmlMalloc(sizeof(xmlSchemaNotation)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaNotation)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry2(schema->notaDecl, name, schema->targetNamespace, + ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add notation %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + + +/** + * xmlSchemaAddAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * @container: the container's name + * + * Add an XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaAttributePtr +xmlSchemaAddAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaAttributePtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->attrDecl == NULL) + schema->attrDecl = xmlHashCreate(10); + if (schema->attrDecl == NULL) + return (NULL); + + ret = (xmlSchemaAttributePtr) xmlMalloc(sizeof(xmlSchemaAttribute)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAttribute)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->attrDecl, name, + schema->targetNamespace, ctxt->container, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add attribute %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + +/** + * xmlSchemaAddAttributeGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Attrribute Group declaration + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaAttributeGroupPtr +xmlSchemaAddAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaAttributeGroupPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->attrgrpDecl == NULL) + schema->attrgrpDecl = xmlHashCreate(10); + if (schema->attrgrpDecl == NULL) + return (NULL); + + ret = (xmlSchemaAttributeGroupPtr) xmlMalloc(sizeof(xmlSchemaAttributeGroup)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaAttributeGroup)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->attrgrpDecl, name, + schema->targetNamespace, ctxt->container, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add attribute group %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + return (ret); +} + +/** + * xmlSchemaAddElement: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the type name + * @namespace: the type namespace + * + * Add an XML schema Element declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaElementPtr +xmlSchemaAddElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name, const xmlChar * namespace) +{ + xmlSchemaElementPtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->elemDecl == NULL) + schema->elemDecl = xmlHashCreate(10); + if (schema->elemDecl == NULL) + return (NULL); + + ret = (xmlSchemaElementPtr) xmlMalloc(sizeof(xmlSchemaElement)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaElement)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry3(schema->elemDecl, name, + namespace, ctxt->container, ret); + if (val != 0) { + char buf[100]; + + snprintf(buf, 99, "privatieelem%d", ctxt->counter++ + 1); + val = xmlHashAddEntry3(schema->elemDecl, name, (xmlChar *) buf, + namespace, ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add element %s\n", + name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + } + return (ret); +} + +/** + * xmlSchemaAddType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @name: the item name + * + * Add an XML schema Simple Type definition + * *WARNING* this interface is highly subject to change + * + * Returns the new struture or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaAddType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + const xmlChar * name) +{ + xmlSchemaTypePtr ret = NULL; + int val; + + if ((ctxt == NULL) || (schema == NULL) || (name == NULL)) + return (NULL); + + if (schema->typeDecl == NULL) + schema->typeDecl = xmlHashCreate(10); + if (schema->typeDecl == NULL) + return (NULL); + + ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType)); + if (ret == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Out of memory\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaType)); + ret->name = xmlStrdup(name); + val = xmlHashAddEntry2(schema->typeDecl, name, schema->targetNamespace, + ret); + if (val != 0) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Could not add type %s\n", name); + xmlFree((char *) ret->name); + xmlFree(ret); + return (NULL); + } + ret->minOccurs = 1; + ret->maxOccurs = 1; + + return (ret); +} + +/************************************************************************ + * * + * Utilities for parsing * + * * + ************************************************************************/ + +/** + * xmlGetQNameProp: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * @name: the attribute name + * @namespace: the result namespace if any + * + * Extract a QName Attribute value + * + * Returns the NCName or NULL if not found, and also update @namespace + * with the namespace URI + */ +static xmlChar * +xmlGetQNameProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, + const char *name, + xmlChar **namespace) { + xmlChar *val, *ret, *prefix; + xmlNsPtr ns; + + + if (namespace != NULL) + *namespace = NULL; + val = xmlGetProp(node, (const xmlChar *) name); + if (val == NULL) + return(NULL); + + ret = xmlSplitQName2(val, &prefix); + if (ret == NULL) + return(val); + xmlFree(val); + + ns = xmlSearchNs(node->doc, node, prefix); + if (ns == NULL) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Attribute %s: the QName prefix %s is undefined\n", + name, prefix); + } else { + *namespace = xmlStrdup(ns->href); + } + xmlFree(prefix); + return(ret); +} + +/** + * xmlGetMaxOccurs: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * Get the maxOccurs property + * + * Returns the default if not found, or the value + */ +static int +xmlGetMaxOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) { + xmlChar *val, *cur; + int ret = 0; + + val = xmlGetProp(node, (const xmlChar *) "maxOccurs"); + if (val == NULL) + return(1); + + if (xmlStrEqual(val, (const xmlChar *) "unbounded")) { + xmlFree(val); + return(UNBOUNDED); /* encoding it with -1 might be another option */ + } + + cur = val; + while (IS_BLANK(*cur)) cur++; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + cur++; + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n", + val); + xmlFree(val); + return(1); + } + xmlFree(val); + return(ret); +} + +/** + * xmlGetMinOccurs: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * Get the minOccurs property + * + * Returns the default if not found, or the value + */ +static int +xmlGetMinOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) { + xmlChar *val, *cur; + int ret = 0; + + val = xmlGetProp(node, (const xmlChar *) "minOccurs"); + if (val == NULL) + return(1); + + cur = val; + while (IS_BLANK(*cur)) cur++; + while ((*cur >= '0') && (*cur <= '9')) { + ret = ret * 10 + (*cur - '0'); + cur++; + } + while (IS_BLANK(*cur)) cur++; + if (*cur != 0) { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n", + val); + xmlFree(val); + return(1); + } + xmlFree(val); + return(ret); +} + +/** + * xmlGetBooleanProp: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * @name: the attribute name + * @def: the default value + * + * Get is a bolean property is set + * + * Returns the default if not found, 0 if found to be false, + * 1 if found to be true + */ +static int +xmlGetBooleanProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node, + const char *name, int def) { + xmlChar *val; + + val = xmlGetProp(node, (const xmlChar *) name); + if (val == NULL) + return(def); + + if (xmlStrEqual(val, BAD_CAST"true")) + def = 1; + else if (xmlStrEqual(val, BAD_CAST"false")) + def = 0; + else { + xmlSchemaErrorContext(ctxt, NULL, node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Attribute %s: the value %s is not boolean\n", + name, val); + } + xmlFree(val); + return(def); +} + +/************************************************************************ + * * + * Shema extraction from an Infoset * + * * + ************************************************************************/ +static xmlSchemaTypePtr xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr + ctxt, xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node, + int simple); +static xmlSchemaTypePtr xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributePtr xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr + ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributeGroupPtr +xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaTypePtr xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaPtr schema, + xmlNodePtr node); +static xmlSchemaAttributePtr +xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node); + +/** + * xmlSchemaParseAttrDecls: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * @type: the hosting type + * + * parse a XML schema attrDecls declaration corresponding to + * + */ +static xmlNodePtr +xmlSchemaParseAttrDecls(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr child, xmlSchemaTypePtr type) +{ + xmlSchemaAttributePtr lastattr, attr; + + lastattr = NULL; + while ((IS_SCHEMA(child, "attribute")) || + (IS_SCHEMA(child, "attributeGroup"))) { + attr = NULL; + if (IS_SCHEMA(child, "attribute")) { + attr = xmlSchemaParseAttribute(ctxt, schema, child); + } else if (IS_SCHEMA(child, "attributeGroup")) { + attr = (xmlSchemaAttributePtr) + xmlSchemaParseAttributeGroup(ctxt, schema, child); + } + if (attr != NULL) { + if (lastattr == NULL) { + type->attributes = attr; + lastattr = attr + ; + } else { + lastattr->next = attr; + lastattr = attr; + } + } + child = child->next; + } + if (IS_SCHEMA(child, "anyAttribute")) { + attr = xmlSchemaParseAnyAttribute(ctxt, schema, child); + if (attr != NULL) { + if (lastattr == NULL) { + type->attributes = attr; + lastattr = attr + ; + } else { + lastattr->next = attr; + lastattr = attr; + } + } + child = child->next; + } + return(child); +} + +/** + * xmlSchemaParseAnnotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaAnnotPtr +xmlSchemaParseAnnotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaAnnotPtr ret; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + ret = xmlSchemaNewAnnot(ctxt, node); + + return (ret); +} + +/** + * xmlSchemaParseFacet: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Facet declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new type structure or NULL in case of error + */ +static xmlSchemaFacetPtr +xmlSchemaParseFacet(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaFacetPtr facet; + xmlNodePtr child = NULL; + xmlChar *value; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + facet = xmlSchemaNewFacet(ctxt); + if (facet == NULL) + return (NULL); + facet->node = node; + value = xmlGetProp(node, (const xmlChar *) "value"); + if (value == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Facet %s has no value\n", node->name); + xmlSchemaFreeFacet(facet); + return (NULL); + } + if (IS_SCHEMA(node, "minInclusive")) { + facet->type = XML_SCHEMA_FACET_MININCLUSIVE; + } else if (IS_SCHEMA(node, "minExclusive")) { + facet->type = XML_SCHEMA_FACET_MINEXCLUSIVE; + } else if (IS_SCHEMA(node, "maxInclusive")) { + facet->type = XML_SCHEMA_FACET_MAXINCLUSIVE; + } else if (IS_SCHEMA(node, "maxExclusive")) { + facet->type = XML_SCHEMA_FACET_MAXEXCLUSIVE; + } else if (IS_SCHEMA(node, "totalDigits")) { + facet->type = XML_SCHEMA_FACET_TOTALDIGITS; + } else if (IS_SCHEMA(node, "fractionDigits")) { + facet->type = XML_SCHEMA_FACET_FRACTIONDIGITS; + } else if (IS_SCHEMA(node, "pattern")) { + facet->type = XML_SCHEMA_FACET_PATTERN; + } else if (IS_SCHEMA(node, "enumeration")) { + facet->type = XML_SCHEMA_FACET_ENUMERATION; + } else if (IS_SCHEMA(node, "whiteSpace")) { + facet->type = XML_SCHEMA_FACET_WHITESPACE; + } else if (IS_SCHEMA(node, "length")) { + facet->type = XML_SCHEMA_FACET_LENGTH; + } else if (IS_SCHEMA(node, "maxLength")) { + facet->type = XML_SCHEMA_FACET_MAXLENGTH; + } else if (IS_SCHEMA(node, "minLength")) { + facet->type = XML_SCHEMA_FACET_MINLENGTH; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Unknown facet type %s\n", node->name); + xmlSchemaFreeFacet(facet); + return(NULL); + } + facet->id = xmlGetProp(node, (const xmlChar *) "id"); + facet->value = value; + child = node->children; + + if (IS_SCHEMA(child, "annotation")) { + facet->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Facet %s has unexpected child content\n", + node->name); + } + return (facet); +} + +/** + * xmlSchemaParseAny: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Any declaration + * *WARNING* this interface is highly subject to change + * + * Returns the new type structure or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseAny(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + snprintf((char *)name, 30, "any %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_ANY; + child = node->children; + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Sequence %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseNotation: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Notation declaration + * + * Returns the new structure or NULL in case of error + */ +static xmlSchemaNotationPtr +xmlSchemaParseNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name; + xmlSchemaNotationPtr ret; + xmlNodePtr child = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Notation has no name\n"); + return (NULL); + } + ret = xmlSchemaAddNotation(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + return (NULL); + } + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "notation %s has unexpected content\n", + name); + } + + return (ret); +} + +/** + * xmlSchemaParseAnyAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema AnyAttrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns an attribute def structure or NULL + */ +static xmlSchemaAttributePtr +xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *processContents; + xmlSchemaAttributePtr ret; + xmlNodePtr child = NULL; + char name[100]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + snprintf(name, 99, "anyattr %d", ctxt->counter++ + 1); + ret = xmlSchemaAddAttribute(ctxt, schema, (xmlChar *)name); + if (ret == NULL) { + return (NULL); + } + ret->id = xmlGetProp(node, (const xmlChar *) "id"); + processContents = xmlGetProp(node, (const xmlChar *) "processContents"); + if ((processContents == NULL) || + (xmlStrEqual(processContents, (const xmlChar *)"strict"))) { + ret->occurs = XML_SCHEMAS_ANYATTR_STRICT; + } else if (xmlStrEqual(processContents, (const xmlChar *)"skip")) { + ret->occurs = XML_SCHEMAS_ANYATTR_SKIP; + } else if (xmlStrEqual(processContents, (const xmlChar *)"lax")) { + ret->occurs = XML_SCHEMAS_ANYATTR_LAX; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "anyAttribute has unexpected content for processContents: %s\n", + processContents); + ret->occurs = XML_SCHEMAS_ANYATTR_STRICT; + } + if (processContents != NULL) + xmlFree(processContents); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "anyAttribute %s has unexpected content\n", + name); + } + + return (ret); +} + + +/** + * xmlSchemaParseAttribute: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attrribute declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaAttributePtr +xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name, *refNs = NULL, *ref = NULL; + xmlSchemaAttributePtr ret; + xmlNodePtr child = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Attribute has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonattr%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + ret = xmlSchemaAddAttribute(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + xmlFree(name); + ret->ref = ref; + ret->refNs = refNs; + ret->typeName = xmlGetQNameProp(ctxt, node, "type", &(ret->typeNs)); + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "simpleType")) { + ret->base = xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "attribute %s has unexpected content\n", + name); + } + + return (ret); +} + +/** + * xmlSchemaParseAttributeGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Attribute Group declaration + * *WARNING* this interface is highly subject to change + * + * Returns the attribute group or NULL in case of error. + */ +static xmlSchemaAttributeGroupPtr +xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlChar *name, *refNs = NULL, *ref = NULL; + xmlSchemaAttributeGroupPtr ret; + xmlSchemaAttributePtr last = NULL, attr; + xmlNodePtr child = NULL; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "AttributeGroup has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonattrgroup%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + ret = xmlSchemaAddAttributeGroup(ctxt, schema, name); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + ret->ref = ref; + ret->refNs = refNs; + child = node->children; + ctxt->container = name; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "attribute")) || + (IS_SCHEMA(child, "attributeGroup"))) { + attr = NULL; + if (IS_SCHEMA(child, "attribute")) { + attr = xmlSchemaParseAttribute(ctxt, schema, child); + } else if (IS_SCHEMA(child, "attributeGroup")) { + attr = (xmlSchemaAttributePtr) + xmlSchemaParseAttributeGroup(ctxt, schema, child); + } + if (attr != NULL) { + if (last == NULL) { + ret->attributes = attr; + last = attr; + } else { + last->next = attr; + last = attr; + } + } + child = child->next; + } + if (IS_SCHEMA(child, "anyAttribute")) { + TODO + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "attribute group %s has unexpected content\n", + name); + } + + ctxt->container = oldcontainer; + return (ret); +} + +/** + * xmlSchemaParseElement: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Element declaration + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaElementPtr +xmlSchemaParseElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, int toplevel) +{ + xmlChar *name, *refNs = NULL, *ref = NULL, *namespace, *fixed; + xmlSchemaElementPtr ret; + xmlNodePtr child = NULL; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Element has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anonelem%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + namespace = xmlGetProp(node, (const xmlChar *) "targetNamespace"); + if (namespace == NULL) + ret = + xmlSchemaAddElement(ctxt, schema, name, + schema->targetNamespace); + else + ret = xmlSchemaAddElement(ctxt, schema, name, namespace); + if (namespace != NULL) + xmlFree(namespace); + if (ret == NULL) { + xmlFree(name); + if (ref != NULL) + xmlFree(ref); + return (NULL); + } + ret->type = XML_SCHEMA_TYPE_ELEMENT; + ret->ref = ref; + ret->refNs = refNs; + if (ref != NULL) + ret->flags |= XML_SCHEMAS_ELEM_REF; + if (toplevel) + ret->flags |= XML_SCHEMAS_ELEM_TOPLEVEL; + if (xmlGetBooleanProp(ctxt, node, "nillable", 0)) + ret->flags |= XML_SCHEMAS_ELEM_NILLABLE; + if (xmlGetBooleanProp(ctxt, node, "abstract", 0)) + ret->flags |= XML_SCHEMAS_ELEM_NILLABLE; + ctxt->container = name; + + ret->id = xmlGetProp(node, BAD_CAST "id"); + ret->namedType = xmlGetQNameProp(ctxt, node, "type", &(ret->namedTypeNs)); + ret->substGroup = xmlGetQNameProp(ctxt, node, "substitutionGroup", + &(ret->substGroupNs)); + fixed = xmlGetProp(node, BAD_CAST "fixed"); + ret->minOccurs = xmlGetMinOccurs(ctxt, node); + ret->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + ret->value = xmlGetProp(node, BAD_CAST "default"); + if ((ret->value != NULL) && (fixed != NULL)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + ctxt->error(ctxt->userData, + "Element %s has both default and fixed\n", + ret->name); + xmlFree(fixed); + } else if (fixed != NULL) { + ret->flags |= XML_SCHEMAS_ELEM_FIXED; + ret->value = fixed; + } + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "complexType")) { + ret->subtypes = xmlSchemaParseComplexType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "simpleType")) { + ret->subtypes = xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "unique")) || + (IS_SCHEMA(child, "key")) || + (IS_SCHEMA(child, "keyref"))) { + TODO + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "element %s has unexpected content\n", + name); + } + + ctxt->container = oldcontainer; + xmlFree(name); + return (ret); +} + +/** + * xmlSchemaParseUnion: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Union definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseUnion(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "union %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_LIST; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = xmlGetProp(node, BAD_CAST "memberTypes"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Union %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseList: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema List definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + snprintf((char *)name, 30, "list %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_LIST; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = xmlGetQNameProp(ctxt, node, "ref", &(type->refNs)); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "List %s has unexpected content\n", + type->name); + } + return (type); +} +/** + * xmlSchemaParseSimpleType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Simple Type definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + snprintf(buf, 99, "simpletype%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "simpleType has no name\n"); + return (NULL); + } + type = xmlSchemaAddType(ctxt, schema, name); + xmlFree(name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SIMPLE; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 1); + child = child->next; + } else if (IS_SCHEMA(child, "list")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseList(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "union")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseUnion(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "SimpleType %s has unexpected content\n", + type->name); + } + + return (type); +} + + +/** + * xmlSchemaParseGroup: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Group definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name, *ref = NULL, *refNs = NULL; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + ref = xmlGetQNameProp(ctxt, node, "ref", &refNs); + if (ref == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "Group has no name nor ref\n"); + return (NULL); + } + snprintf(buf, 99, "anongroup%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_GROUP; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->ref = ref; + type->refNs = refNs; + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "all")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Group %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseAll: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema All definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "all%d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SEQUENCE; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "All %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseChoice: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Choice definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "choice %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_CHOICE; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "element")) || + (IS_SCHEMA(child, "group")) || + (IS_SCHEMA(child, "any")) || + (IS_SCHEMA(child, "choice")) || + (IS_SCHEMA(child, "sequence"))) { + subtype = NULL; + if (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + } else if (IS_SCHEMA(child, "any")) { + subtype = xmlSchemaParseAny(ctxt, schema, child); + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + } + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Choice %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseSequence: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Sequence definition + * *WARNING* this interface is highly subject to change + * + * Returns -1 in case of error, 0 if the declaration is inproper and + * 1 in case of success. + */ +static xmlSchemaTypePtr +xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype, last = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "sequence %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SEQUENCE; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->minOccurs = xmlGetMinOccurs(ctxt, node); + type->maxOccurs = xmlGetMaxOccurs(ctxt, node); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + while ((IS_SCHEMA(child, "element")) || + (IS_SCHEMA(child, "group")) || + (IS_SCHEMA(child, "any")) || + (IS_SCHEMA(child, "choice")) || + (IS_SCHEMA(child, "sequence"))) { + subtype = NULL; + if (IS_SCHEMA(child, "element")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseElement(ctxt, schema, child, 0); + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + } else if (IS_SCHEMA(child, "any")) { + subtype = xmlSchemaParseAny(ctxt, schema, child); + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + } + if (subtype != NULL) { + if (last == NULL) { + type->subtypes = subtype; + last = subtype; + } else { + last->next = subtype; + last = subtype; + } + last->next = NULL; + } + child = child->next; + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Sequence %s has unexpected content\n", + type->name); + } + + return (type); +} + +/** + * xmlSchemaParseRestriction: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * @simple: is that part of a simple type. + * + * parse a XML schema Restriction definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node, int simple) +{ + xmlSchemaTypePtr type, subtype; + xmlSchemaFacetPtr facet, lastfacet = NULL; + xmlNodePtr child = NULL; + xmlChar name[30]; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + + snprintf((char *)name, 30, "restriction %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_RESTRICTION; + type->id = xmlGetProp(node, BAD_CAST "id"); + type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs)); + if ((!simple) && (type->base == NULL)) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Restriction %s has no base\n", + type->name); + } + ctxt->container = name; + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else if (IS_SCHEMA(child, "group")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + type->subtypes = subtype; + } else { + if (IS_SCHEMA(child, "simpleType")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + type->baseType = subtype; + } + /* + * Facets + */ + while ((IS_SCHEMA(child, "minInclusive")) || + (IS_SCHEMA(child, "minExclusive")) || + (IS_SCHEMA(child, "maxInclusive")) || + (IS_SCHEMA(child, "maxExclusive")) || + (IS_SCHEMA(child, "totalDigits")) || + (IS_SCHEMA(child, "fractionDigits")) || + (IS_SCHEMA(child, "pattern")) || + (IS_SCHEMA(child, "enumeration")) || + (IS_SCHEMA(child, "whiteSpace")) || + (IS_SCHEMA(child, "length")) || + (IS_SCHEMA(child, "maxLength")) || + (IS_SCHEMA(child, "minLength"))) { + facet = xmlSchemaParseFacet(ctxt, schema, child); + if (facet != NULL) { + if (lastfacet == NULL) { + type->facets = facet; + lastfacet = facet; + } else { + lastfacet->next = facet; + lastfacet = facet; + } + lastfacet->next = NULL; + } + child = child->next; + } + } + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Restriction %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + return (type); +} + +/** + * xmlSchemaParseExtension: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Extension definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseExtension(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + + snprintf((char *)name, 30, "extension %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_EXTENSION; + type->id = xmlGetProp(node, BAD_CAST "id"); + ctxt->container = name; + + type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs)); + if (type->base == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Extension %s has no base\n", + type->name); + } + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Extension %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + return (type); +} + +/** + * xmlSchemaParseSimpleContent: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema SimpleContent definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseSimpleContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_SIMPLE_CONTENT; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 0); + child = child->next; + } else if (IS_SCHEMA(child, "extension")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseExtension(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "SimpleContent %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseComplexContent: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema ComplexContent definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseComplexContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar name[30]; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + + snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1); + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) + return (NULL); + type->node = node; + type->type = XML_SCHEMA_TYPE_COMPLEX_CONTENT; + type->id = xmlGetProp(node, BAD_CAST "id"); + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + subtype = NULL; + if (IS_SCHEMA(child, "restriction")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseRestriction(ctxt, schema, child, 0); + child = child->next; + } else if (IS_SCHEMA(child, "extension")) { + subtype = (xmlSchemaTypePtr) + xmlSchemaParseExtension(ctxt, schema, child); + child = child->next; + } + type->subtypes = subtype; + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "ComplexContent %s has unexpected content\n", + type->name); + } + return (type); +} + +/** + * xmlSchemaParseComplexType: + * @ctxt: a schema validation context + * @schema: the schema being built + * @node: a subtree containing XML Schema informations + * + * parse a XML schema Complex Type definition + * *WARNING* this interface is highly subject to change + * + * Returns the type definition or NULL in case of error + */ +static xmlSchemaTypePtr +xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema, + xmlNodePtr node) +{ + xmlSchemaTypePtr type, subtype; + xmlNodePtr child = NULL; + xmlChar *name; + xmlChar *oldcontainer; + + if ((ctxt == NULL) || (schema == NULL) || (node == NULL)) + return (NULL); + + oldcontainer = ctxt->container; + name = xmlGetProp(node, (const xmlChar *) "name"); + if (name == NULL) { + char buf[100]; + + snprintf(buf, 99, "anontype%d", ctxt->counter++ + 1); + name = xmlStrdup((xmlChar *) buf); + } + if (name == NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, "complexType has no name\n"); + return (NULL); + } + type = xmlSchemaAddType(ctxt, schema, name); + if (type == NULL) { + xmlFree(name); + return (NULL); + } + type->node = node; + type->type = XML_SCHEMA_TYPE_COMPLEX; + type->id = xmlGetProp(node, BAD_CAST "id"); + ctxt->container = name; + + child = node->children; + if (IS_SCHEMA(child, "annotation")) { + type->annot = xmlSchemaParseAnnotation(ctxt, schema, child); + child = child->next; + } + if (IS_SCHEMA(child, "simpleContent")) { + subtype = xmlSchemaParseSimpleContent(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "complexContent")) { + type->subtypes = xmlSchemaParseComplexContent(ctxt, schema, child); + child = child->next; + } else { + subtype = NULL; + + if (IS_SCHEMA(child, "all")) { + subtype = xmlSchemaParseAll(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "choice")) { + subtype = xmlSchemaParseChoice(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "sequence")) { + subtype = xmlSchemaParseSequence(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + subtype = xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } + if (subtype != NULL) + type->subtypes = subtype; + child = xmlSchemaParseAttrDecls(ctxt, schema, child, type); + } + if (child != NULL) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "ComplexType %s has unexpected content\n", + type->name); + } + ctxt->container = oldcontainer; + xmlFree(name); + return (type); +} + + +/** + * xmlSchemaParseSchema: + * @ctxt: a schema validation context + * @node: a subtree containing XML Schema informations + * + * parse a XML schema definition from a node set + * *WARNING* this interface is highly subject to change + * + * Returns the internal XML Schema structure built from the resource or + * NULL in case of error + */ +static xmlSchemaPtr +xmlSchemaParseSchema(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) +{ + xmlSchemaPtr schema = NULL; + xmlSchemaAnnotPtr annot; + xmlNodePtr child = NULL; + xmlChar *val; + + if ((ctxt == NULL) || (node == NULL)) + return (NULL); + + if (IS_SCHEMA(node, "schema")) { + schema = xmlSchemaNewSchema(ctxt); + if (schema == NULL) + return(NULL); + schema->targetNamespace = xmlGetProp(node, BAD_CAST "targetNamespace"); + schema->id = xmlGetProp(node, BAD_CAST "id"); + schema->version = xmlGetProp(node, BAD_CAST "version"); + val = xmlGetProp(node, BAD_CAST "elementFormDefault"); + if (val != NULL) { + if (xmlStrEqual(val, BAD_CAST "qualified")) + schema->flags |= XML_SCHEMAS_QUALIF_ELEM; + else if (!xmlStrEqual(val, BAD_CAST "unqualified")) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) { + ctxt->error(ctxt->userData, + "Invalid value %s for elementFormDefault\n", + val); + } + } + xmlFree(val); + } + val = xmlGetProp(node, BAD_CAST "attributeFormDefault"); + if (val != NULL) { + if (xmlStrEqual(val, BAD_CAST "qualified")) + schema->flags |= XML_SCHEMAS_QUALIF_ATTR; + else if (!xmlStrEqual(val, BAD_CAST "unqualified")) { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) { + ctxt->error(ctxt->userData, + "Invalid value %s for elementFormDefault\n", + val); + } + } + xmlFree(val); + } + + child = node->children; + while ((IS_SCHEMA(child, "include")) || + (IS_SCHEMA(child, "import")) || + (IS_SCHEMA(child, "redefine")) || + (IS_SCHEMA(child, "annotation"))) { + if (IS_SCHEMA(child, "annotation")) { + annot = xmlSchemaParseAnnotation(ctxt, schema, child); + if (schema->annot == NULL) + schema->annot = annot; + else + xmlSchemaFreeAnnot(annot); + } else if (IS_SCHEMA(child, "include")) { + TODO + } else if (IS_SCHEMA(child, "import")) { + TODO + } else if (IS_SCHEMA(child, "redefine")) { + TODO + } + child = child->next; + } + while (child != NULL) { + if (IS_SCHEMA(child, "complexType")) { + xmlSchemaParseComplexType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "simpleType")) { + xmlSchemaParseSimpleType(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "element")) { + xmlSchemaParseElement(ctxt, schema, child, 1); + child = child->next; + } else if (IS_SCHEMA(child, "attribute")) { + xmlSchemaParseAttribute(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "attributeGroup")) { + xmlSchemaParseAttributeGroup(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "group")) { + xmlSchemaParseGroup(ctxt, schema, child); + child = child->next; + } else if (IS_SCHEMA(child, "notation")) { + xmlSchemaParseNotation(ctxt, schema, child); + child = child->next; + } else { + xmlSchemaErrorContext(ctxt, schema, node, child); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: unexpected element %s here \n", + child->name); + child = child->next; + } + while (IS_SCHEMA(child, "annotation")) { + annot = xmlSchemaParseAnnotation(ctxt, schema, child); + if (schema->annot == NULL) + schema->annot = annot; + else + xmlSchemaFreeAnnot(annot); + child = child->next; + } + } + } +#ifdef DEBUG + if (schema == NULL) + xmlGenericError(xmlGenericErrorContext, + "xmlSchemaParse() failed\n"); +#endif + + return (schema); +} + +/************************************************************************ + * * + * Validating using Schemas * + * * + ************************************************************************/ + +/************************************************************************ + * * + * Reading/Writing Schemas * + * * + ************************************************************************/ + +/** + * xmlSchemaNewParserCtxt: + * @URL: the location of the schema + * + * Create an XML Schemas parse context for that file/resource expected + * to contain an XML Schemas file. + * + * Returns the parser context or NULL in case of error + */ +xmlSchemaParserCtxtPtr +xmlSchemaNewParserCtxt(const char *URL) { + xmlSchemaParserCtxtPtr ret; + + if (URL == NULL) + return(NULL); + + ret = (xmlSchemaParserCtxtPtr) xmlMalloc(sizeof(xmlSchemaParserCtxt)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to allocate new schama parser context for %s\n", URL); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaParserCtxt)); + ret->URL = xmlStrdup((const xmlChar *)URL); + return (ret); +} + +/** + * xmlSchemaFreeParserCtxt: + * @ctxt: the schema parser context + * + * Free the resources associated to the schema parser context + */ +void +xmlSchemaFreeParserCtxt(xmlSchemaParserCtxtPtr ctxt) { + if (ctxt == NULL) + return; + if (ctxt->URL != NULL) + xmlFree(ctxt->URL); + xmlFree(ctxt); +} + +/************************************************************************ + * * + * Building the content models * + * * + ************************************************************************/ +/** + * xmlSchemaBuildAContentModel: + * @type: the schema type definition + * @ctxt: the schema parser context + * @name: the element name whose content is being built + * + * Generate the automata sequence needed for that type + */ +static void +xmlSchemaBuildAContentModel(xmlSchemaTypePtr type, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) { + if (type == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Found unexpected type = NULL in %s content model\n", + name); + return; + } + switch (type->type) { + case XML_SCHEMA_TYPE_ANY: + /* TODO : handle the namespace too */ + /* TODO : make that a specific transition type */ + TODO + ctxt->state = xmlAutomataNewTransition(ctxt->am, ctxt->state, + NULL, BAD_CAST "*", NULL); + break; + case XML_SCHEMA_TYPE_ELEMENT: { + xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type; + /* TODO : handle the namespace too */ + xmlAutomataStatePtr oldstate = ctxt->state; + if (elem->maxOccurs >= UNBOUNDED) { + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate); + if (elem->minOccurs == 0) { + /* basically an elem* */ + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + } else if (elem->maxOccurs > 1) { + if (elem->refDecl != NULL) { + TODO + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewCountTrans(ctxt->am, + ctxt->state, NULL, elem->name, + elem->minOccurs, elem->maxOccurs, type); + } + } else { + if (elem->refDecl != NULL) { + xmlSchemaBuildAContentModel( + (xmlSchemaTypePtr) elem->refDecl, + ctxt, elem->refDecl->name); + } else { + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, elem->name, type); + } + if (elem->minOccurs == 0) { + /* basically an elem? */ + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + } + } + break; + } + case XML_SCHEMA_TYPE_SEQUENCE: { + xmlSchemaTypePtr subtypes; + + /* + * Simply iterate over the subtypes + */ + subtypes = type->subtypes; + while (subtypes != NULL) { + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + subtypes = subtypes->next; + } + break; + } + case XML_SCHEMA_TYPE_CHOICE: { + xmlSchemaTypePtr subtypes; + xmlAutomataStatePtr start, end; + + start = ctxt->state; + end = xmlAutomataNewState(ctxt->am); + + /* + * iterate over the subtypes and remerge the end with an + * epsilon transition + */ + subtypes = type->subtypes; + while (subtypes != NULL) { + ctxt->state = start; + xmlSchemaBuildAContentModel(subtypes, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end); + subtypes = subtypes->next; + } + ctxt->state = end; + break; + } + case XML_SCHEMA_TYPE_ALL: { + TODO + break; + } + case XML_SCHEMA_TYPE_RESTRICTION: + case XML_SCHEMA_TYPE_EXTENSION: + case XML_SCHEMA_TYPE_GROUP: + case XML_SCHEMA_TYPE_COMPLEX: + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: + if (type->subtypes != NULL) + xmlSchemaBuildAContentModel(type->subtypes, ctxt, name); + break; + default: + xmlGenericError(xmlGenericErrorContext, + "Found unexpected type %d in %s content model\n", + type->type, name); + return; + } +} +/** + * xmlSchemaBuildContentModel: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Fixes the content model of the element. + */ +static void +xmlSchemaBuildContentModel(xmlSchemaElementPtr elem, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) { + xmlAutomataStatePtr start; + +#ifdef DEBUG_CONTENT + xmlGenericError(xmlGenericErrorContext, + "Building content model for %s\n", name); +#endif + + if (elem->contModel != NULL) + return; + if (elem->subtypes == NULL) + return; + if (elem->subtypes->type != XML_SCHEMA_TYPE_COMPLEX) + return; + ctxt->am = xmlNewAutomata(); + if (ctxt->am == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Cannot create automata for elem %s\n", name); + return; + } + start = ctxt->state = xmlAutomataGetInitState(ctxt->am); + xmlSchemaBuildAContentModel(elem->subtypes, ctxt, name); + xmlAutomataSetFinalState(ctxt->am, ctxt->state); + elem->contModel = xmlAutomataCompile(ctxt->am); +#ifdef DEBUG_CONTENT + printf("Content model of %s:\n", name); + xmlRegexpPrint(stdout, elem->contModel); +#endif + ctxt->state = NULL; + xmlFreeAutomata(ctxt->am); + ctxt->am = NULL; +} + +/** + * xmlSchemaRefFixupCallback: + * @elem: the schema element context + * @ctxt: the schema parser context + * + * Free the resources associated to the schema parser context + */ +static void +xmlSchemaRefFixupCallback(xmlSchemaElementPtr elem, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name, + ATTRIBUTE_UNUSED const xmlChar *context, + ATTRIBUTE_UNUSED const xmlChar *namespace) +{ + if ((ctxt == NULL) || (elem == NULL)) + return; + if (elem->ref != NULL) { + xmlSchemaElementPtr elemDecl; + + if (elem->subtypes != NULL) { + xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s have both ref and subtype\n", + name); + return; + } + elemDecl = xmlHashLookup2(ctxt->schema->elemDecl, + elem->ref, elem->refNs); + + if (elemDecl == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s ref to %s not found\n", + name, elem->ref); + return; + } + elem->refDecl = elemDecl; + } else if (elem->namedType != NULL) { + xmlSchemaTypePtr typeDecl; + + if (elem->subtypes != NULL) { + xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s have both type and subtype\n", + name); + return; + } + typeDecl = xmlSchemaGetType(ctxt->schema, elem->namedType, + elem->namedTypeNs); + + if (typeDecl == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: element %s type %s not found\n", + name, elem->namedType); + return; + } + elem->subtypes = typeDecl; + } +} + +/** + * xmlSchemaTypeFixup: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Fixes the content model of the type. + */ +static void +xmlSchemaTypeFixup(xmlSchemaTypePtr typeDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + if (name == NULL) + name = typeDecl->name; + if (typeDecl->contentType == XML_SCHEMA_CONTENT_UNKNOWN) { + switch (typeDecl->type) { + case XML_SCHEMA_TYPE_SIMPLE_CONTENT: { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + break; + } + case XML_SCHEMA_TYPE_RESTRICTION: { + if (typeDecl->subtypes != NULL) + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + + if (typeDecl->base != NULL) { + xmlSchemaTypePtr baseType; + + baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (baseType == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: type %s base type %s not found\n", + name, typeDecl->base); + } + typeDecl->baseType = baseType; + } + if (typeDecl->subtypes == NULL) + /* 1.1.1 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->subtypes == NULL) && + ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) || + (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE))) + /* 1.1.2 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) && + (typeDecl->subtypes->subtypes == NULL)) + /* 1.1.3 */ + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + else { + /* 1.2 and 2.X are applied at the other layer */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } + break; + } + case XML_SCHEMA_TYPE_EXTENSION: { + xmlSchemaContentType explicitContentType; + xmlSchemaTypePtr base; + + if (typeDecl->base != NULL) { + xmlSchemaTypePtr baseType; + + baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (baseType == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: type %s base type %s not found\n", + name, typeDecl->base); + } + typeDecl->baseType = baseType; + } + if (typeDecl->subtypes != NULL) + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + + if (typeDecl->subtypes == NULL) + /* 1.1.1 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->subtypes == NULL) && + ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) || + (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE))) + /* 1.1.2 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) && + (typeDecl->subtypes->subtypes == NULL)) + /* 1.1.3 */ + explicitContentType = XML_SCHEMA_CONTENT_EMPTY; + + base = xmlSchemaGetType(ctxt->schema, typeDecl->base, + typeDecl->baseNs); + if (base == NULL) { + xmlSchemaErrorContext(ctxt, NULL, typeDecl->node, NULL); + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: base type %s of type %s not found\n", + typeDecl->base, name); + return; + } + xmlSchemaTypeFixup(base, ctxt, NULL); + if (explicitContentType == XML_SCHEMA_CONTENT_EMPTY) { + /* 2.1 */ + typeDecl->contentType = base->contentType; + } else if (base->contentType == XML_SCHEMA_CONTENT_EMPTY) { + /* 2.2 imbitable ! */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } else { + /* 2.3 imbitable pareil ! */ + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + } + break; + } + case XML_SCHEMA_TYPE_COMPLEX: { + if (typeDecl->subtypes == NULL) { + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + } else { + if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED) + typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED; + else { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + } + } + break; + } + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: { + if (typeDecl->subtypes == NULL) { + typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY; + } else { + if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED) + typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED; + else { + xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL); + typeDecl->contentType = typeDecl->subtypes->contentType; + } + } + break; + } + case XML_SCHEMA_TYPE_SEQUENCE: + case XML_SCHEMA_TYPE_GROUP: + case XML_SCHEMA_TYPE_ALL: + case XML_SCHEMA_TYPE_CHOICE: + typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS; + break; + case XML_SCHEMA_TYPE_BASIC: + case XML_SCHEMA_TYPE_ANY: + case XML_SCHEMA_TYPE_FACET: + case XML_SCHEMA_TYPE_SIMPLE: + case XML_SCHEMA_TYPE_UR: + case XML_SCHEMA_TYPE_ELEMENT: + case XML_SCHEMA_TYPE_ATTRIBUTE: + case XML_SCHEMA_TYPE_NOTATION: + case XML_SCHEMA_TYPE_LIST: + case XML_SCHEMA_TYPE_UNION: + case XML_SCHEMA_FACET_MININCLUSIVE: + case XML_SCHEMA_FACET_MINEXCLUSIVE: + case XML_SCHEMA_FACET_MAXINCLUSIVE: + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + case XML_SCHEMA_FACET_TOTALDIGITS: + case XML_SCHEMA_FACET_FRACTIONDIGITS: + case XML_SCHEMA_FACET_PATTERN: + case XML_SCHEMA_FACET_ENUMERATION: + case XML_SCHEMA_FACET_WHITESPACE: + case XML_SCHEMA_FACET_LENGTH: + case XML_SCHEMA_FACET_MAXLENGTH: + case XML_SCHEMA_FACET_MINLENGTH: + typeDecl->contentType = XML_SCHEMA_CONTENT_SIMPLE; + break; + } + } +} + +/** + * xmlSchemaCheckDefaults: + * @typeDecl: the schema type definition + * @ctxt: the schema parser context + * + * Checks the default values types, especially for facets + */ +static void +xmlSchemaCheckDefaults(xmlSchemaTypePtr typeDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + static xmlSchemaTypePtr nonNegativeIntegerType = NULL; + if (name == NULL) + name = typeDecl->name; + if (nonNegativeIntegerType == NULL) { + nonNegativeIntegerType = xmlSchemaGetPredefinedType( + BAD_CAST "nonNegativeInteger", xmlSchemaNs); + } + if (typeDecl->type == XML_SCHEMA_TYPE_RESTRICTION) { + if (typeDecl->facets != NULL) { + xmlSchemaFacetPtr facet = typeDecl->facets; + while (facet != NULL) { + switch (facet->type) { + case XML_SCHEMA_FACET_MININCLUSIVE: + case XML_SCHEMA_FACET_MINEXCLUSIVE: + case XML_SCHEMA_FACET_MAXINCLUSIVE: + case XML_SCHEMA_FACET_MAXEXCLUSIVE: { + /* + * Okay we need to validate the value + * at that point. + */ + xmlSchemaValidCtxtPtr vctxt; + + vctxt = xmlSchemaNewValidCtxt(NULL); + if (vctxt == NULL) + break; + xmlSchemaValidateSimpleValue(vctxt, typeDecl, + facet->value); + facet->val = vctxt->value; + vctxt->value = NULL; + if (facet->val == NULL) { + /* error code */ + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s facet value %s invalid\n", + name, facet->value); + } + xmlSchemaFreeValidCtxt(vctxt); + break; + } + case XML_SCHEMA_FACET_ENUMERATION: { + /* + * Okay we need to validate the value + * at that point. + */ + xmlSchemaValidCtxtPtr vctxt; + int ret; + + vctxt = xmlSchemaNewValidCtxt(NULL); + if (vctxt == NULL) + break; + ret = xmlSchemaValidateSimpleValue(vctxt, typeDecl, + facet->value); + if (ret != 0) { + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s enumeration value %s invalid\n", + name, facet->value); + } + xmlSchemaFreeValidCtxt(vctxt); + break; + } + case XML_SCHEMA_FACET_PATTERN: + facet->regexp = xmlRegexpCompile(facet->value); + if (facet->regexp == NULL) { + /* error code */ + ctxt->error(ctxt->userData, + "Schemas: type %s facet regexp %s invalid\n", + name, facet->value); + } + break; + case XML_SCHEMA_FACET_TOTALDIGITS: + case XML_SCHEMA_FACET_FRACTIONDIGITS: + case XML_SCHEMA_FACET_LENGTH: + case XML_SCHEMA_FACET_MAXLENGTH: + case XML_SCHEMA_FACET_MINLENGTH: { + int ret; + + ret = xmlSchemaValidatePredefinedType( + nonNegativeIntegerType, facet->value, + &facet->val); + if (ret != 0) { + /* error code */ + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s facet value %s invalid\n", + name, facet->value); + } + break; + } + case XML_SCHEMA_FACET_WHITESPACE: { + if (xmlStrEqual(facet->value, BAD_CAST"preserve")) { + facet->whitespace = XML_SCHEMAS_FACET_PRESERVE; + } else if (xmlStrEqual(facet->value, + BAD_CAST"replace")) { + facet->whitespace = XML_SCHEMAS_FACET_REPLACE; + } else if (xmlStrEqual(facet->value, + BAD_CAST"collapse")) { + facet->whitespace = XML_SCHEMAS_FACET_COLLAPSE; + } else { + xmlSchemaErrorContext(ctxt, NULL, + facet->node, NULL); + ctxt->error(ctxt->userData, + "Schemas: type %s whiteSpace value %s invalid\n", + name, facet->value); + } + } + default: + break; + } + facet = facet->next; + } + } + } +} + +/** + * xmlSchemaAttrFixup: + * @attrDecl: the schema attribute definition + * @ctxt: the schema parser context + * @name: the attribute name + * + * Fixes finish doing the computations on the attributes definitions + */ +static void +xmlSchemaAttrFixup(xmlSchemaAttributePtr attrDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name) +{ + if (name == NULL) + name = attrDecl->name; + if (attrDecl->subtypes != NULL) + return; + if (attrDecl->typeName != NULL) { + xmlSchemaTypePtr type; + + type = xmlSchemaGetType(ctxt->schema, attrDecl->typeName, + attrDecl->typeNs); + if (type == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s type %s not found\n", + name, attrDecl->typeName); + } + attrDecl->subtypes = type; + } else if (attrDecl->ref != NULL) { + xmlSchemaAttributePtr ref; + + ref = xmlHashLookup2(ctxt->schema->attrDecl, attrDecl->ref, + attrDecl->refNs); + if (ref == NULL) { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s reference %s not found\n", + name, attrDecl->ref); + return; + } + xmlSchemaAttrFixup(ref, ctxt, NULL); + attrDecl->subtypes = ref->subtypes; + } else { + if ((ctxt != NULL) && (ctxt->error != NULL)) + ctxt->error(ctxt->userData, + "Schemas: attribute %s has no type nor reference\n", + name); + } +} + +/** + * xmlSchemaParse: + * @ctxt: a schema validation context + * @URL: the location of the schema + * + * Load, XML parse a schema definition resource and build an internal + * XML Shema struture which can be used to validate instances. + * *WARNING* this interface is highly subject to change + * + * Returns the internal XML Schema structure built from the resource or + * NULL in case of error + */ +xmlSchemaPtr +xmlSchemaParse(xmlSchemaParserCtxtPtr ctxt) +{ + xmlSchemaPtr ret = NULL; + xmlDocPtr doc; + xmlNodePtr root, cur, delete; + + xmlSchemaInitTypes(); + + if ((ctxt == NULL) || (ctxt->URL == NULL)) + return (NULL); + + ctxt->counter = 0; + ctxt->container = NULL; + + /* + * First step is to parse the input document into an DOM/Infoset + */ + doc = xmlParseFile((const char *) ctxt->URL); + if (doc == NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "xmlSchemaParse: could not load %s\n", ctxt->URL); + return (NULL); + } + + /* + * Then extract the root and Schema parse it + */ + root = xmlDocGetRootElement(doc); + if (root == NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "xmlSchemaParse: %s is empty\n", + ctxt->URL); + return (NULL); + } + + /* + * Remove all the blank text nodes + */ + delete = NULL; + cur = root; + while (cur != NULL) { + if (delete != NULL) { + xmlUnlinkNode(delete); + xmlFreeNode(delete); + delete = NULL; + } + if (cur->type == XML_TEXT_NODE) { + if (IS_BLANK_NODE(cur)) { + if (xmlNodeGetSpacePreserve(cur) != 1) { + delete = cur; + } + } + } else if ((cur->type != XML_ELEMENT_NODE) && + (cur->type != XML_CDATA_SECTION_NODE)) { + delete = cur; + goto skip_children; + } + + /* + * Skip to next node + */ + if (cur->children != NULL) { + if ((cur->children->type != XML_ENTITY_DECL) && + (cur->children->type != XML_ENTITY_REF_NODE) && + (cur->children->type != XML_ENTITY_NODE)) { + cur = cur->children; + continue; + } + } +skip_children: + if (cur->next != NULL) { + cur = cur->next; + continue; + } + + do { + cur = cur->parent; + if (cur == NULL) + break; + if (cur == root) { + cur = NULL; + break; + } + if (cur->next != NULL) { + cur = cur->next; + break; + } + } while (cur != NULL); + } + if (delete != NULL) { + xmlUnlinkNode(delete); + xmlFreeNode(delete); + delete = NULL; + } + + /* + * Then do the parsing for good + */ + ret = xmlSchemaParseSchema(ctxt, root); + ret->doc = doc; + + /* + * Then fix all the references. + */ + ctxt->schema = ret; + xmlHashScanFull(ret->elemDecl, + (xmlHashScannerFull) xmlSchemaRefFixupCallback, ctxt); + + /* + * Then fixup all types properties + */ + xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaTypeFixup, ctxt); + + /* + * Then build the content model for all elements + */ + xmlHashScan(ret->elemDecl, + (xmlHashScanner) xmlSchemaBuildContentModel, ctxt); + + /* + * Then check the defaults part of the type like facets values + */ + xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaCheckDefaults, ctxt); + + /* + * Then fixup all attributes declarations + */ + xmlHashScan(ret->attrDecl, (xmlHashScanner) xmlSchemaAttrFixup, ctxt); + + return (ret); +} + +/** + * xmlSchemaParse: + * @ctxt: a schema validation context + * @URL: the location of the schema + * + * Load, XML parse a schema definition resource and build an internal + * XML Shema struture which can be used to validate instances. + * *WARNING* this interface is highly subject to change + * + * Returns the internal XML Schema structure built from the resource or + * NULL in case of error + */ +void +xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, void *ctx) { + if (ctxt == NULL) + return; + ctxt->error = err; + ctxt->warning = warn; + ctxt->userData = ctx; +} + +/************************************************************************ + * * + * Simple type validation * + * * + ************************************************************************/ + +/** + * xmlSchemaValidateSimpleValue: + * @ctxt: a schema validation context + * @type: the type declaration + * @value: the value to validate + * + * Validate a value against a simple type + * + * Returns 0 if the value is valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaTypePtr type, + xmlChar *value) { + int ret = 0; + /* + * First normalize the value accordingly to Schema Datatype + * 4.3.6 whiteSpace definition of the whiteSpace facet of type + */ + /* + * Then check the normalized value against the lexical space of the + * type. + */ + if (type->type == XML_SCHEMA_TYPE_BASIC) { + if (ctxt->value != NULL) { + xmlSchemaFreeValue(ctxt->value); + ctxt->value = NULL; + } + ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value)); + } else if (type->type == XML_SCHEMA_TYPE_RESTRICTION) { + xmlSchemaTypePtr base; + xmlSchemaFacetPtr facet; + int tmp; + + base = type->baseType; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else if (type->subtypes != NULL) { + + } + /* + * Do not validate facets when working on building the Schemas + */ + if (ctxt->schema != NULL) { + if (ret == 0) { + facet = type->facets; + while (facet != NULL) { + tmp = xmlSchemaValidateFacet(base, facet, value, + ctxt->value); + if (tmp != 0) + ret = tmp; + facet = facet->next; + } + } + } + } else if (type->type == XML_SCHEMA_TYPE_SIMPLE) { + xmlSchemaTypePtr base; + + base = type->subtypes; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else { + TODO + } + } else if (type->type == XML_SCHEMA_TYPE_LIST) { + xmlSchemaTypePtr base; + xmlChar *cur, *end, tmp; + int ret2; + + base = type->subtypes; + if (base == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) { + xmlSchemaErrorContext(NULL, ctxt->schema, type->node, NULL); + ctxt->error(ctxt->userData, + "Internal: List type %s has no base type\n", + type->name); + } + return(-1); + } + cur = value; + do { + while (IS_BLANK(*cur)) cur++; + end = cur; + while ((*end != 0) && (!(IS_BLANK(*end)))) end++; + if (end == cur) + break; + tmp = *end; + *end = 0; + ret2 = xmlSchemaValidateSimpleValue(ctxt, base, cur); + if (ret2 != 0) + ret = 1; + *end = tmp; + cur = end; + } while (*cur != 0); + } else { + TODO + } + return(ret); +} + +/************************************************************************ + * * + * DOM Validation code * + * * + ************************************************************************/ + +static int xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr node); +static int xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr elem, xmlSchemaAttributePtr attributes); +static int xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr elem, xmlSchemaElementPtr elemDecl, xmlSchemaTypePtr type); + +/** + * xmlSchemaRegisterAttributes: + * @ctxt: a schema validation context + * @attrs: a list of attributes + * + * Register the list of attributes as the set to be validated on that element + * + * Returns -1 in case of error, 0 otherwise + */ +static int +xmlSchemaRegisterAttributes(xmlSchemaValidCtxtPtr ctxt, + xmlAttrPtr attrs) { + while (attrs != NULL) { + if (ctxt->attrNr >= ctxt->attrMax) { + xmlSchemaAttrStatePtr tmp; + + ctxt->attrMax *= 2; + tmp = (xmlSchemaAttrStatePtr) + xmlRealloc(ctxt->attr, ctxt->attrMax * + sizeof(xmlSchemaAttrState)); + if (tmp == NULL) { + ctxt->attrMax /= 2; + return(-1); + } + ctxt->attr = tmp; + } + ctxt->attr[ctxt->attrNr].attr = attrs; + ctxt->attr[ctxt->attrNr].state = XML_SCHEMAS_ATTR_UNKNOWN; + ctxt->attrNr++; + attrs = attrs->next; + } + return(0); +} + +/** + * xmlSchemaCheckAttributes: + * @ctxt: a schema validation context + * @node: the node carrying it. + * + * Check that the registered set of attributes on the current node + * has been properly validated. + * + * Returns 0 if validity constraints are met, 1 otherwise. + */ +static int +xmlSchemaCheckAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + int ret = 0; + int i; + + for (i = ctxt->attrBase;i < ctxt->attrNr;i++) { + if (ctxt->attr[i].attr == NULL) + break; + if (ctxt->attr[i].state == XML_SCHEMAS_ATTR_UNKNOWN) { + ret = 1; + ctxt->err = XML_SCHEMAS_ERR_ATTRUNKNOWN; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Attribute %s on %s is unknown\n", + ctxt->attr[i].attr->name, + node->name); + } + } + return(ret); +} + +/** + * xmlSchemaValidateSimpleContent: + * @ctxt: a schema validation context + * @elem: an element + * @type: the type declaration + * + * Validate the content of an element expected to be a simple type + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleContent(xmlSchemaValidCtxtPtr ctxt, + ATTRIBUTE_UNUSED xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type, base; + xmlChar *value; + int ret = 0, tmp; + + child = ctxt->node; + type = ctxt->type; + + /* + * Validation Rule: Element Locally Valid (Type): 3.1.3 + */ + value = xmlNodeGetContent(child); + /* xmlSchemaValidateSimpleValue(ctxt, type, value); */ + switch (type->type) { + case XML_SCHEMA_TYPE_RESTRICTION: { + xmlSchemaFacetPtr facet; + + base = type->baseType; + if (base != NULL) { + ret = xmlSchemaValidateSimpleValue(ctxt, base, value); + } else { + TODO + } + if (ret == 0) { + facet = type->facets; + while (facet != NULL) { + tmp = xmlSchemaValidateFacet(base, facet, value, + ctxt->value); + if (tmp != 0) + ret = tmp; + facet = facet->next; + } + } + break; + } + default: + TODO + } + if (value != NULL) + xmlFree(value); + + return(ret); +} + +/** + * xmlSchemaValidateCheckNodeList + * @nodelist: the list of nodes + * + * Check the node list is only made of text nodes and entities pointing + * to text nodes + * + * Returns 1 if true, 0 if false and -1 in case of error + */ +static int +xmlSchemaValidateCheckNodeList(xmlNodePtr nodelist) { + while (nodelist != NULL) { + if (nodelist->type == XML_ENTITY_REF_NODE) { + TODO /* implement recursion in the entity content */ + } + if ((nodelist->type != XML_TEXT_NODE) && + (nodelist->type != XML_COMMENT_NODE) && + (nodelist->type != XML_PI_NODE) && + (nodelist->type != XML_PI_NODE)) { + return(0); + } + nodelist = nodelist->next; + } + return(1); +} + +/** + * xmlSchemaSkipIgnored: + * @ctxt: a schema validation context + * @type: the current type context + * @node: the top node. + * + * Skip ignorable nodes in that context + * + * Returns the new sibling + * number otherwise and -1 in case of internal or API error. + */ +static xmlNodePtr +xmlSchemaSkipIgnored(ATTRIBUTE_UNUSED xmlSchemaValidCtxtPtr ctxt, + xmlSchemaTypePtr type, + xmlNodePtr node) { + int mixed = 0; + /* + * TODO complete and handle entities + */ + mixed = ((type->contentType == XML_SCHEMA_CONTENT_MIXED) || + (type->contentType == XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS)); + while ((node != NULL) && + ((node->type == XML_COMMENT_NODE) || + ((mixed == 1) && (node->type == XML_TEXT_NODE)) || + (((type->contentType == XML_SCHEMA_CONTENT_ELEMENTS) && + (node->type == XML_TEXT_NODE) && + (IS_BLANK_NODE(node)))))) { + node = node->next; + } + return(node); +} + +/** + * xmlSchemaValidateCallback: + * @ctxt: a schema validation context + * @name: the name of the element detected (might be NULL) + * @type: the type + * + * A transition has been made in the automata associated to an element + * content model + */ +static void +xmlSchemaValidateCallback(xmlSchemaValidCtxtPtr ctxt, + ATTRIBUTE_UNUSED const xmlChar *name, + xmlSchemaTypePtr type, + xmlNodePtr node) { + xmlSchemaTypePtr oldtype = ctxt->type; + xmlNodePtr oldnode = ctxt->node; +#ifdef DEBUG_CONTENT + printf("xmlSchemaValidateCallback: %s, %s, %s\n", + name, type->name, node->name); +#endif + ctxt->type = type; + ctxt->node = node; + xmlSchemaValidateContent(ctxt, node); + ctxt->type = oldtype; + ctxt->node = oldnode; +} + + +#if 0 +/** + * xmlSchemaValidateSimpleRestrictionType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of a restriction type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleRestrictionType(xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr node) +{ + xmlNodePtr child; + xmlSchemaTypePtr type; + int ret; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleRestrictionType %s\n", + node->name); + return (-1); + } + /* + * Only text and text based entities references shall be found there + */ + ret = xmlSchemaValidateCheckNodeList(child); + if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s content\n", + node->name); + return (-1); + } else if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s content is not a simple type\n", + node->name); + return (-1); + } + ctxt->type = type->subtypes; + xmlSchemaValidateContent(ctxt, node); + ctxt->type = type; + return (ret); +} +#endif + +/** + * xmlSchemaValidateSimpleType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an simple type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateSimpleType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + xmlAttrPtr attr; + int ret; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s\n", + node->name); + return(-1); + } + /* + * Only text and text based entities references shall be found there + */ + ret = xmlSchemaValidateCheckNodeList(child); + if (ret < 0) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateSimpleType %s content\n", + node->name); + return(-1); + } else if (ret == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s content is not a simple type\n", + node->name); + return(-1); + } + /* + * Validation Rule: Element Locally Valid (Type): 3.1.1 + */ + attr = node->properties; + while (attr != NULL) { + if ((attr->ns == NULL) || + (!xmlStrEqual(attr->ns->href, xmlSchemaInstanceNs)) || + ((!xmlStrEqual(attr->name, BAD_CAST"type")) && + (!xmlStrEqual(attr->name, BAD_CAST"nil")) && + (!xmlStrEqual(attr->name, BAD_CAST"schemasLocation")) && + (!xmlStrEqual(attr->name, BAD_CAST"noNamespaceSchemaLocation")))) { + ctxt->err = XML_SCHEMAS_ERR_INVALIDATTR; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: attribute %s should not be present\n", + child->name, attr->name); + return(ctxt->err); + } + } + + ctxt->type = type->subtypes; + ret = xmlSchemaValidateSimpleContent(ctxt, node); + ctxt->type = type; + return(ret); +} + +/** + * xmlSchemaValidateElementType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element type. + * Validation Rule: Element Locally Valid (Complex Type) + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateElementType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + xmlRegExecCtxtPtr oldregexp; /* cont model of the parent */ + xmlSchemaElementPtr decl; + int ret, attrBase; + + oldregexp = ctxt->regexp; + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateElementType\n", + node->name); + return(-1); + } + if (child == NULL) { + if (type->minOccurs > 0) { + ctxt->err = XML_SCHEMAS_ERR_MISSING; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: missing child %s\n", + node->name, type->name); + } + return(ctxt->err); + } + + /* + * Verify the element matches + */ + if (!xmlStrEqual(child->name, type->name)) { + ctxt->err = XML_SCHEMAS_ERR_WRONGELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: missing child %s found %s\n", + node->name, type->name, child->name); + return(ctxt->err); + } + /* + * Verify the attributes + */ + attrBase = ctxt->attrBase; + ctxt->attrBase = ctxt->attrNr; + xmlSchemaRegisterAttributes(ctxt, child->properties); + xmlSchemaValidateAttributes(ctxt, child, type->attributes); + /* + * Verify the element content recursively + */ + decl = (xmlSchemaElementPtr) type; + oldregexp = ctxt->regexp; + if (decl->contModel != NULL) { + ctxt->regexp = xmlRegNewExecCtxt(decl->contModel, + (xmlRegExecCallbacks) xmlSchemaValidateCallback, + ctxt); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s\n", node->name); +#endif + } + xmlSchemaValidateType(ctxt, child, (xmlSchemaElementPtr)type, + type->subtypes); + + if (decl->contModel != NULL) { + ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s : %d\n", node->name, ret); +#endif + if (ret == 0) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + node->name); + } else if (ret < 0) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failure\n", + node->name); +#ifdef DEBUG_CONTENT + } else { + xmlGenericError(xmlGenericErrorContext, + "Element %s content check succeeded\n", node->name); + +#endif + } + xmlRegFreeExecCtxt(ctxt->regexp); + } + /* + * Verify that all attributes were Schemas-validated + */ + xmlSchemaCheckAttributes(ctxt, node); + ctxt->attrNr = ctxt->attrBase; + ctxt->attrBase = attrBase; + + ctxt->regexp = oldregexp; + + ctxt->node = child; + ctxt->type = type; + return(ctxt->err); +} + +/** + * xmlSchemaValidateBasicType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element expected to be a basic type type + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateBasicType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + int ret; + xmlNodePtr child, cur; + xmlSchemaTypePtr type; + xmlChar *value; /* lexical representation */ + + child = ctxt->node; + type = ctxt->type; + + if ((ctxt == NULL) || (type == NULL)) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: xmlSchemaValidateBasicType\n", + node->name); + return(-1); + } + /* + * First check the content model of the node. + */ + cur = child; + while (cur != NULL) { + switch (cur->type) { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + break; + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + TODO + break; + case XML_ELEMENT_NODE: + ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: child %s should not be present\n", + node->name, cur->name); + return(ctxt->err); + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_HTML_DOCUMENT_NODE: + case XML_DTD_NODE: + case XML_ELEMENT_DECL: + case XML_ATTRIBUTE_DECL: + case XML_ENTITY_DECL: + case XML_NAMESPACE_DECL: +#ifdef LIBXML_DOCB_ENABLED + case XML_DOCB_DOCUMENT_NODE: +#endif + ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s: node type %d unexpected here\n", + node->name, cur->type); + return(ctxt->err); + } + cur = cur->next; + } + if (child == NULL) + value = NULL; + else + value = xmlNodeGetContent(child->parent); + + if (ctxt->value != NULL) { + xmlSchemaFreeValue(ctxt->value); + ctxt->value = NULL; + } + ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value)); + if (value != NULL) + xmlFree(value); + if (ret != 0) { + ctxt->error(ctxt->userData, + "Element %s: failed to validate basic type %s\n", + node->name, type->name); + } + return(ret); +} + +/** + * xmlSchemaValidateComplexType: + * @ctxt: a schema validation context + * @node: the top node. + * + * Validate the content of an element expected to be a complex type type + * xmlschema-1.html#cvc-complex-type + * Validation Rule: Element Locally Valid (Complex Type) + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type, subtype, model; + int ret; + + child = ctxt->node; + type = ctxt->type; + + /* 3.4.4 1 was verified on the caller */ + + switch (type->contentType) { + case XML_SCHEMA_CONTENT_EMPTY: + if (child != NULL) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s is supposed to be empty\n", + node->name); + } + break; + case XML_SCHEMA_CONTENT_ELEMENTS: + case XML_SCHEMA_CONTENT_MIXED: + case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS: + /* + * Skip ignorable nodes in that context + */ + child = xmlSchemaSkipIgnored(ctxt, type, child); + subtype = type->subtypes; + ctxt->type = model; + while (child != NULL) { + if (child->type == XML_ELEMENT_NODE) { + ret = xmlRegExecPushString(ctxt->regexp, + child->name, child); +#ifdef DEBUG_AUTOMATA + if (ret < 0) + xmlGenericError(xmlGenericErrorContext, + " --> %s Error\n", child->name); + else + xmlGenericError(xmlGenericErrorContext, + " --> %s\n", child->name); +#endif + } + child = child->next; + /* + * Skip ignorable nodes in that context + */ + child = xmlSchemaSkipIgnored(ctxt, type, child); + } + break; + default: + TODO + xmlGenericError(xmlGenericErrorContext, + "unimplemented content type %d\n", + type->contentType); + } + return(ctxt->err); +} + +/** + * xmlSchemaValidateContent: + * @ctxt: a schema validation context + * @elem: an element + * @type: the type declaration + * + * Validate the content of an element against the type. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) { + xmlNodePtr child; + xmlSchemaTypePtr type; + + child = ctxt->node; + type = ctxt->type; + + switch (type->type) { + case XML_SCHEMA_TYPE_ANY: + /* Any type will do it, fine */ + TODO /* handle recursivity */ + break; + case XML_SCHEMA_TYPE_COMPLEX: + xmlSchemaValidateComplexType(ctxt, node); + break; + case XML_SCHEMA_TYPE_ELEMENT: { + xmlSchemaElementPtr decl = (xmlSchemaElementPtr) type; + /* + * Handle element reference here + */ + if (decl->ref != NULL) { + if (decl->refDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: element reference %s not resolved\n", + decl->ref); + return(-1); + } + ctxt->type = (xmlSchemaTypePtr) decl->refDecl; + decl = decl->refDecl; + } + xmlSchemaValidateElementType(ctxt, node); + ctxt->type = type; + break; + } + case XML_SCHEMA_TYPE_BASIC: + xmlSchemaValidateBasicType(ctxt, node); + break; + case XML_SCHEMA_TYPE_FACET: + TODO + break; + case XML_SCHEMA_TYPE_SIMPLE: + xmlSchemaValidateSimpleType(ctxt, node); + break; + case XML_SCHEMA_TYPE_SEQUENCE: + TODO + break; + case XML_SCHEMA_TYPE_CHOICE: + TODO + break; + case XML_SCHEMA_TYPE_ALL: + TODO + break; + case XML_SCHEMA_TYPE_SIMPLE_CONTENT: + TODO + break; + case XML_SCHEMA_TYPE_COMPLEX_CONTENT: + TODO + break; + case XML_SCHEMA_TYPE_UR: + TODO + break; + case XML_SCHEMA_TYPE_RESTRICTION: + /*xmlSchemaValidateRestrictionType(ctxt, node); */ + TODO + break; + case XML_SCHEMA_TYPE_EXTENSION: + TODO + break; + case XML_SCHEMA_TYPE_ATTRIBUTE: + TODO + break; + case XML_SCHEMA_TYPE_GROUP: + TODO + break; + case XML_SCHEMA_TYPE_NOTATION: + TODO + break; + case XML_SCHEMA_TYPE_LIST: + TODO + break; + case XML_SCHEMA_TYPE_UNION: + TODO + break; + case XML_SCHEMA_FACET_MININCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MINEXCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MAXINCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + TODO + break; + case XML_SCHEMA_FACET_TOTALDIGITS: + TODO + break; + case XML_SCHEMA_FACET_FRACTIONDIGITS: + TODO + break; + case XML_SCHEMA_FACET_PATTERN: + TODO + break; + case XML_SCHEMA_FACET_ENUMERATION: + TODO + break; + case XML_SCHEMA_FACET_WHITESPACE: + TODO + break; + case XML_SCHEMA_FACET_LENGTH: + TODO + break; + case XML_SCHEMA_FACET_MAXLENGTH: + TODO + break; + case XML_SCHEMA_FACET_MINLENGTH: + TODO + break; + } + xmlSchemaValidateAttributes(ctxt, node, type->attributes); + + if (ctxt->node == NULL) + return(ctxt->err); + ctxt->node = ctxt->node->next; + ctxt->type = type->next; + return(ctxt->err); +} + +/** + * xmlSchemaValidateType: + * @ctxt: a schema validation context + * @elem: an element + * @type: the list of type declarations + * + * Validate the content of an element against the types. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem, + xmlSchemaElementPtr elemDecl, + xmlSchemaTypePtr type) { + xmlChar *nil; + + if ((elem->content == NULL) || (type == NULL) || (elemDecl == NULL)) + return(0); + /* + * 3.3.4 : 2 + */ + if (elemDecl->flags & XML_SCHEMAS_ELEM_ABSTRACT) { + ctxt->err = XML_SCHEMAS_ERR_ISABSTRACT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s is abstract\n", elem->name); + return(ctxt->err); + } + /* + * 3.3.4: 3 + */ + nil = xmlGetNsProp(elem, BAD_CAST "nil", xmlSchemaInstanceNs); + if (elemDecl->flags & XML_SCHEMAS_ELEM_NILLABLE) { + /* 3.3.4: 3.2 */ + if (xmlStrEqual(nil, BAD_CAST "true")) { + if (elem->children != NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTEMPTY; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s is not empty\n", + elem->name); + return(ctxt->err); + } + if ((elemDecl->flags & XML_SCHEMAS_ELEM_FIXED) && + (elemDecl->value != NULL)) { + ctxt->err = XML_SCHEMAS_ERR_HAVEDEFAULT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Empty element %s cannot get a fixed value\n", + elem->name); + return(ctxt->err); + } + } + } else { + /* 3.3.4: 3.1 */ + if (nil != NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTNILLABLE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Element %s with xs:nil but not nillable\n", + elem->name); + xmlFree(nil); + return(ctxt->err); + } + } + + /* TODO 3.3.4: 4 if the element carries xs:type*/ + + ctxt->type = elemDecl->subtypes; + ctxt->node = elem->children; + xmlSchemaValidateContent(ctxt, elem); + xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes); + + return(ctxt->err); +} + + +/** + * xmlSchemaValidateAttributes: + * @ctxt: a schema validation context + * @elem: an element + * @attributes: the list of attribute declarations + * + * Validate the attributes of an element. + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem, + xmlSchemaAttributePtr attributes) { + int i, ret; + xmlAttrPtr attr; + xmlChar *value; + + if (attributes == NULL) + return(0); + while (attributes != NULL) { + for (i = ctxt->attrBase;i < ctxt->attrNr;i++) { + attr = ctxt->attr[i].attr; + if (attr == NULL) + continue; + if (!xmlStrEqual(attr->name, attributes->name)) + continue; + /* + * TODO: handle the mess about namespaces here. + */ + if ((attr->ns != NULL) /* || (attributes->ns != NULL) */) { + TODO + } + if (attributes->subtypes == NULL) { + ctxt->err = XML_SCHEMAS_ERR_INTERNAL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "Internal error: attribute %s type not resolved\n", + attr->name); + continue; + } + value = xmlNodeListGetString(elem->doc, attr->children, 1); + ret = xmlSchemaValidateSimpleValue(ctxt, attributes->subtypes, + value); + if (ret != 0) { + ctxt->err = XML_SCHEMAS_ERR_ATTRINVALID; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, + "attribute %s on %s does not match type\n", + attr->name, elem->name); + } else { + ctxt->attr[i].state = XML_SCHEMAS_ATTR_CHECKED; + } + if (value != NULL) { + xmlFree(value); + } + } + attributes = attributes->next; + } + return(ctxt->err); +} + +/** + * xmlSchemaValidateElement: + * @ctxt: a schema validation context + * @elem: an element + * + * Validate an element in a tree + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateElement(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem) { + xmlSchemaElementPtr elemDecl; + int ret, attrBase; + + if (elem->ns != NULL) + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + elem->name, elem->ns->href, NULL); + else + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + elem->name, NULL, NULL); + /* + * 3.3.4 : 1 + */ + if (elemDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s not declared\n", + elem->name); + return(ctxt->err); + } + if (elemDecl->subtypes == NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOTYPE; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s has no type\n", + elem->name); + return(ctxt->err); + } + /* + * Verify the attributes + */ + attrBase = ctxt->attrBase; + ctxt->attrBase = ctxt->attrNr; + xmlSchemaRegisterAttributes(ctxt, elem->properties); + xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes); + /* + * Verify the element content recursively + */ + if (elemDecl->contModel != NULL) { + ctxt->regexp = xmlRegNewExecCtxt(elemDecl->contModel, + (xmlRegExecCallbacks) xmlSchemaValidateCallback, + ctxt); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s\n", elem->name); +#endif + } + xmlSchemaValidateType(ctxt, elem, elemDecl, elemDecl->subtypes); + ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL); +#ifdef DEBUG_AUTOMATA + xmlGenericError(xmlGenericErrorContext, + "====> %s : %d\n", elem->name, ret); +#endif + if (ret == 0) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + elem->name); + } else if (ret < 0) { + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s content check failed\n", + elem->name); +#ifdef DEBUG_CONTENT + } else { + xmlGenericError(xmlGenericErrorContext, + "Element %s content check succeeded\n", elem->name); + +#endif + } + if (elemDecl->contModel != NULL) { + xmlRegFreeExecCtxt(ctxt->regexp); + } + /* + * Verify that all attributes were Schemas-validated + */ + xmlSchemaCheckAttributes(ctxt, elem); + ctxt->attrNr = ctxt->attrBase; + ctxt->attrBase = attrBase; + + return(ctxt->err); +} + +/** + * xmlSchemaValidateDocument: + * @ctxt: a schema validation context + * @doc: a parsed document tree + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +static int +xmlSchemaValidateDocument(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr root; + xmlSchemaElementPtr elemDecl; + + root = xmlDocGetRootElement(doc); + if (root == NULL) { + ctxt->err = XML_SCHEMAS_ERR_NOROOT; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "document has no root\n"); + return(ctxt->err); + } + if (root->ns != NULL) + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + root->name, root->ns->href, NULL); + else + elemDecl = xmlHashLookup3(ctxt->schema->elemDecl, + root->name, NULL, NULL); + if (elemDecl == NULL) { + ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Element %s not declared\n", + root->name); + } + if ((elemDecl->flags & XML_SCHEMAS_ELEM_TOPLEVEL) == 0) { + ctxt->err = XML_SCHEMAS_ERR_NOTTOPLEVEL; + if (ctxt->error != NULL) + ctxt->error(ctxt->userData, "Root element %s not toplevel\n", + root->name); + } + /* + * Okay, start the recursive validation + */ + xmlSchemaValidateElement(ctxt, root); + + return(ctxt->err); +} + +/************************************************************************ + * * + * SAX Validation code * + * * + ************************************************************************/ + +/************************************************************************ + * * + * Validation interfaces * + * * + ************************************************************************/ + +/** + * xmlSchemaNewValidCtxt: + * @schema: a precompiled XML Schemas + * + * Create an XML Schemas validation context based on the given schema + * + * Returns the validation context or NULL in case of error + */ +xmlSchemaValidCtxtPtr +xmlSchemaNewValidCtxt(xmlSchemaPtr schema) { + xmlSchemaValidCtxtPtr ret; + + ret = (xmlSchemaValidCtxtPtr) xmlMalloc(sizeof(xmlSchemaValidCtxt)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Failed to allocate new schama validation context\n"); + return (NULL); + } + memset(ret, 0, sizeof(xmlSchemaValidCtxt)); + ret->schema = schema; + ret->attrNr = 0; + ret->attrMax = 10; + ret->attr = (xmlSchemaAttrStatePtr) xmlMalloc(ret->attrMax * + sizeof(xmlSchemaAttrState)); + if (ret->attr == NULL) { + free(ret); + return(NULL); + } + memset(ret->attr, 0, ret->attrMax * sizeof(xmlSchemaAttrState)); + return (ret); +} + +/** + * xmlSchemaFreeValidCtxt: + * @ctxt: the schema validation context + * + * Free the resources associated to the schema validation context + */ +void +xmlSchemaFreeValidCtxt(xmlSchemaValidCtxtPtr ctxt) { + if (ctxt == NULL) + return; + if (ctxt->attr != NULL) + xmlFree(ctxt->attr); + xmlFree(ctxt); +} + +/** + * xmlSchemaSetValidErrors: + * @ctxt: a schema validation context + * @err: the error function + * @warn: the warning function + * @ctxt: the functions context + * + * Set the error and warning callback informations + */ +void +xmlSchemaSetValidErrors(xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, void *ctx) { + if (ctxt == NULL) + return; + ctxt->error = err; + ctxt->warning = warn; + ctxt->userData = ctx; +} + +/** + * xmlSchemaValidateDoc: + * @ctxt: a schema validation context + * @doc: a parsed document tree + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateDoc(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) { + int ret; + + if ((ctxt == NULL) || (doc == NULL)) + return(-1); + + ctxt->doc = doc; + ret = xmlSchemaValidateDocument(ctxt, doc); + return(ret); +} + +/** + * xmlSchemaValidateStream: + * @ctxt: a schema validation context + * @input: the input to use for reading the data + * @enc: an optional encoding information + * @sax: a SAX handler for the resulting events + * @user_data: the context to provide to the SAX handler. + * + * Validate a document tree in memory. + * + * Returns 0 if the document is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateStream(xmlSchemaValidCtxtPtr ctxt, + xmlParserInputBufferPtr input, xmlCharEncoding enc, + xmlSAXHandlerPtr sax, void *user_data) { + if ((ctxt == NULL) || (input == NULL)) + return(-1); + ctxt->input = input; + ctxt->enc = enc; + ctxt->sax = sax; + ctxt->user_data = user_data; + TODO + return(0); +} + +#endif /* LIBXML_SCHEMAS_ENABLED */ diff --git a/xmlschemastypes.c b/xmlschemastypes.c new file mode 100644 index 00000000..4c8d4202 --- /dev/null +++ b/xmlschemastypes.c @@ -0,0 +1,490 @@ +/* + * schemastypes.c : implementation of the XML Schema Datatypes + * definition and validity checking + * + * See Copyright for the status of this software. + * + * Daniel Veillard + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define DEBUG + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +#define XML_SCHEMAS_NAMESPACE_NAME \ + (const xmlChar *)"http://www.w3.org/2001/XMLSchema" + +typedef enum { + XML_SCHEMAS_UNKNOWN = 0, + XML_SCHEMAS_STRING, + XML_SCHEMAS_NMTOKEN, + XML_SCHEMAS_DECIMAL, + XML_SCHEMAS_, + XML_SCHEMAS_XXX +} xmlSchemaValType; + +unsigned long powten[10] = { + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000L, + 100000000L, 1000000000L +}; + +typedef struct _xmlSchemaValDecimal xmlSchemaValDecimal; +typedef xmlSchemaValDecimal *xmlSchemaValDecimalPtr; +struct _xmlSchemaValDecimal { + /* would use long long but not portable */ + unsigned long base; + unsigned int extra; + int sign:1; + int frac:7; + int total:8; +}; + +struct _xmlSchemaVal { + xmlSchemaValType type; + union { + xmlSchemaValDecimal decimal; + } value; +}; + +static int xmlSchemaTypesInitialized = 0; +static xmlHashTablePtr xmlSchemaTypesBank = NULL; + +static xmlSchemaTypePtr xmlSchemaTypeStringDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeAnyTypeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeAnySimpleTypeDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDecimalDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeDateDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypePositiveIntegerDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeNonNegativeIntegerDef = NULL; +static xmlSchemaTypePtr xmlSchemaTypeNmtoken = NULL; + +/* + * xmlSchemaInitBasicType: + * @name: the type name + * + * Initialize one default type + */ +static xmlSchemaTypePtr +xmlSchemaInitBasicType(const char *name) { + xmlSchemaTypePtr ret; + + ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType)); + if (ret == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Could not initilize type %s: out of memory\n", name); + return(NULL); + } + memset(ret, 0, sizeof(xmlSchemaType)); + ret->name = xmlStrdup((const xmlChar *)name); + ret->type = XML_SCHEMA_TYPE_BASIC; + ret->contentType = XML_SCHEMA_CONTENT_BASIC; + xmlHashAddEntry2(xmlSchemaTypesBank, ret->name, + XML_SCHEMAS_NAMESPACE_NAME, ret); + return(ret); +} + +/* + * xmlSchemaInitTypes: + * + * Initialize the default XML Schemas type library + */ +void +xmlSchemaInitTypes(void) { + if (xmlSchemaTypesInitialized != 0) + return; + xmlSchemaTypesBank = xmlHashCreate(40); + + xmlSchemaTypeStringDef = xmlSchemaInitBasicType("string"); + xmlSchemaTypeAnyTypeDef = xmlSchemaInitBasicType("anyType"); + xmlSchemaTypeAnySimpleTypeDef = xmlSchemaInitBasicType("anySimpleType"); + xmlSchemaTypeDecimalDef = xmlSchemaInitBasicType("decimal"); + xmlSchemaTypeDateDef = xmlSchemaInitBasicType("date"); + xmlSchemaTypePositiveIntegerDef = xmlSchemaInitBasicType("positiveInteger"); + xmlSchemaTypeNonNegativeIntegerDef = + xmlSchemaInitBasicType("nonNegativeInteger"); + xmlSchemaTypeNmtoken = xmlSchemaInitBasicType("NMTOKEN"); + + xmlSchemaTypesInitialized = 1; +} + +/** + * xmlSchemaCleanupTypes: + * + * Cleanup the default XML Schemas type library + */ +void +xmlSchemaCleanupTypes(void) { + if (xmlSchemaTypesInitialized == 0) + return; + xmlHashFree(xmlSchemaTypesBank, (xmlHashDeallocator) xmlSchemaFreeType); + xmlSchemaTypesInitialized = 0; +} + +/** + * xmlSchemaNewValue: + * @type: the value type + * + * Allocate a new simple type value + * + * Returns a pointer to the new value or NULL in case of error + */ +static xmlSchemaValPtr +xmlSchemaNewValue(xmlSchemaValType type) { + xmlSchemaValPtr value; + + value = (xmlSchemaValPtr) xmlMalloc(sizeof(xmlSchemaVal)); + if (value == NULL) { + return(NULL); + } + memset(value, 0, sizeof(xmlSchemaVal)); + value->type = type; + return(value); +} + +/** + * xmlSchemaFreeValue: + * @value: the value to free + * + * Cleanup the default XML Schemas type library + */ +void +xmlSchemaFreeValue(xmlSchemaValPtr value) { + if (value == NULL) + return; + xmlFree(value); +} + +/** + * xmlSchemaGetPredefinedType: + * @name: the type name + * @ns: the URI of the namespace usually "http://www.w3.org/2001/XMLSchema" + * + * Lookup a type in the default XML Schemas type library + * + * Returns the type if found, NULL otherwise + */ +xmlSchemaTypePtr +xmlSchemaGetPredefinedType(const xmlChar *name, const xmlChar *ns) { + if (xmlSchemaTypesInitialized == 0) + xmlSchemaInitTypes(); + if (name == NULL) + return(NULL); + return((xmlSchemaTypePtr) xmlHashLookup2(xmlSchemaTypesBank, name, ns)); +} +/** + * xmlSchemaValidatePredefinedType: + * @type: the predefined type + * @value: the value to check + * @val: the return computed value + * + * Check that a value conforms to the lexical space of the predefined type. + * if true a value is computed and returned in @val. + * + * Returns 0 if this validates, a positive error code number otherwise + * and -1 in case of internal or API error. + */ +int +xmlSchemaValidatePredefinedType(xmlSchemaTypePtr type, const xmlChar *value, + xmlSchemaValPtr *val) { + xmlSchemaValPtr v; + + if (xmlSchemaTypesInitialized == 0) + return(-1); + if (type == NULL) + return(-1); + if (val != NULL) + *val = NULL; + if (type == xmlSchemaTypeStringDef) { + return(0); + } else if (type == xmlSchemaTypeAnyTypeDef) { + return(0); + } else if (type == xmlSchemaTypeAnySimpleTypeDef) { + return(0); + } else if (type == xmlSchemaTypeNmtoken) { + if (xmlValidateNmtokenValue(value)) + return(0); + return(1); + } else if (type == xmlSchemaTypeDecimalDef) { + const xmlChar *cur = value, *tmp; + int frac = 0, main, neg = 0; + unsigned long base = 0; + if (cur == NULL) + return(1); + if (*cur == '+') + cur++; + else if (*cur == '-') { + neg = 1; + cur++; + } + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + main = cur - tmp; + if (*cur == '.') { + cur++; + tmp = cur; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + cur++; + } + frac = cur - tmp; + } + if (*cur != 0) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = neg; + v->value.decimal.frac = frac; + v->value.decimal.total = frac + main; + *val = v; + } + } + return(0); + } else if (type == xmlSchemaTypeDateDef) { + const xmlChar *cur = value; + if (cur == NULL) + return(1); + if (*cur == '-') + cur++; + if ((*cur < '0') || (*cur > '9')) + return(1); + if ((*cur < '0') || (*cur > '9')) + return(1); + if ((*cur < '0') || (*cur > '9')) + return(1); + if ((*cur < '0') || (*cur > '9')) + return(1); + while ((*cur >= '0') && (*cur <= '9')) + cur++; + if (*cur != '-') + return(1); + cur++; + if ((*cur != '0') && (*cur != '1')) + return(1); + if ((*cur == '0') && (cur[1] == '0')) + return(1); + if ((*cur == '1') && ((cur[1] < '0') || (cur[1] > '2'))) + return(1); + cur += 2; + if (*cur != '-') + return(1); + cur++; + if ((*cur < '0') || (*cur > '3')) + return(1); + if ((*cur == '0') && (cur[1] == '0')) + return(1); + if ((*cur == '3') && ((cur[1] < '0') || (cur[1] > '1'))) + return(1); + cur += 2; + if (*cur != 0) + return(1); + return(0); + } else if (type == xmlSchemaTypePositiveIntegerDef) { + const xmlChar *cur = value; + unsigned long base = 0; + int total = 0; + if (cur == NULL) + return(1); + if (*cur == '+') + cur++; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + total++; + cur++; + } + if (*cur != 0) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = 0; + v->value.decimal.frac = 0; + v->value.decimal.total = total; + *val = v; + } + } + return(0); + } else if (type == xmlSchemaTypeNonNegativeIntegerDef) { + const xmlChar *cur = value; + unsigned long base = 0; + int total = 0; + int sign = 0; + if (cur == NULL) + return(1); + if (*cur == '-') { + sign = 1; + cur++; + } else if (*cur == '+') + cur++; + while ((*cur >= '0') && (*cur <= '9')) { + base = base * 10 + (*cur - '0'); + total++; + cur++; + } + if (*cur != 0) + return(1); + if ((sign == 1) && (base != 0)) + return(1); + if (val != NULL) { + v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL); + if (v != NULL) { + v->value.decimal.base = base; + v->value.decimal.sign = 0; + v->value.decimal.frac = 0; + v->value.decimal.total = total; + *val = v; + } + } + return(0); + } else { + TODO + return(0); + } +} + +/** + * xmlSchemaCompareDecimals: + * @x: a first decimal value + * @y: a second decimal value + * + * Compare 2 decimals + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error + */ +static int +xmlSchemaCompareDecimals(xmlSchemaValPtr x, xmlSchemaValPtr y) +{ + xmlSchemaValPtr swp; + int order = 1; + unsigned long tmp; + + if ((x->value.decimal.sign) && (x->value.decimal.sign)) + order = -1; + else if (x->value.decimal.sign) + return (-1); + else if (y->value.decimal.sign) + return (1); + if (x->value.decimal.frac == y->value.decimal.frac) { + if (x->value.decimal.base < y->value.decimal.base) + return (-1); + return (x->value.decimal.base > y->value.decimal.base); + } + if (y->value.decimal.frac > x->value.decimal.frac) { + swp = y; + y = x; + x = swp; + order = -order; + } + tmp = + x->value.decimal.base / powten[x->value.decimal.frac - + y->value.decimal.frac]; + if (tmp > y->value.decimal.base) + return (order); + if (tmp < y->value.decimal.base) + return (-order); + tmp = + y->value.decimal.base * powten[x->value.decimal.frac - + y->value.decimal.frac]; + if (x->value.decimal.base < tmp) + return (-order); + if (x->value.decimal.base == tmp) + return (0); + return (order); +} + +/** + * xmlSchemaCompareValues: + * @x: a first value + * @y: a second value + * + * Compare 2 values + * + * Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error + */ +int +xmlSchemaCompareValues(xmlSchemaValPtr x, xmlSchemaValPtr y) { + if ((x == NULL) || (y == NULL)) + return(-2); + + switch (x->type) { + case XML_SCHEMAS_STRING: + TODO + case XML_SCHEMAS_DECIMAL: + if (y->type == XML_SCHEMAS_DECIMAL) + return(xmlSchemaCompareDecimals(x, y)); + else + return(-2); + default: + TODO + } +} + +/** + * xmlSchemaValidateFacet: + * @type: the type declaration + * @facet: the facet to check + * @value: the lexical repr of the value to validate + * @val: the precomputed value + * + * Check a value against a facet condition + * + * Returns 0 if the element is schemas valid, a positive error code + * number otherwise and -1 in case of internal or API error. + */ +int +xmlSchemaValidateFacet(xmlSchemaTypePtr base, xmlSchemaFacetPtr facet, + const xmlChar *value, xmlSchemaValPtr val) +{ + int ret; + + switch (facet->type) { + case XML_SCHEMA_FACET_PATTERN: + ret = xmlRegexpExec(facet->regexp, value); + if (ret == 1) + return(0); + if (ret == 0) { + TODO /* error code */ + return(1); + } + return(ret); + case XML_SCHEMA_FACET_MAXEXCLUSIVE: + ret = xmlSchemaCompareValues(val, facet->val); + if (ret == -2) { + TODO /* error code */ + return(-1); + } + if (ret == -1) + return(0); + TODO /* error code */ + return(1); + default: + TODO + } + return(0); +} + +#endif /* LIBXML_SCHEMAS_ENABLED */ diff --git a/xmlunicode.c b/xmlunicode.c new file mode 100644 index 00000000..8baf8d18 --- /dev/null +++ b/xmlunicode.c @@ -0,0 +1,4290 @@ +/* + * xmlunicode.c: this module implements the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html + * using the genUnicode.py Python script. + * + * Generation date: Tue Apr 16 17:28:05 2002 + * Sources: Blocks-4.txt UnicodeData-3.1.0.txt + * Daniel Veillard + */ + +#define IN_LIBXML +#include "libxml.h" + +#ifdef LIBXML_UNICODE_ENABLED + +#include +#include +#include + +/** + * xmlUCSIsAlphabeticPresentationForms: + * @code: UCS code point + * + * Check whether the character is part of AlphabeticPresentationForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsAlphabeticPresentationForms(int code) { + return((code >= 0xFB00) && (code <= 0xFB4F)); +} + +/** + * xmlUCSIsArabic: + * @code: UCS code point + * + * Check whether the character is part of Arabic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabic(int code) { + return((code >= 0x0600) && (code <= 0x06FF)); +} + +/** + * xmlUCSIsArabicPresentationFormsA: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsA(int code) { + return((code >= 0xFB50) && (code <= 0xFDFF)); +} + +/** + * xmlUCSIsArabicPresentationFormsB: + * @code: UCS code point + * + * Check whether the character is part of ArabicPresentationForms-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArabicPresentationFormsB(int code) { + return((code >= 0xFE70) && (code <= 0xFEFE)); +} + +/** + * xmlUCSIsArmenian: + * @code: UCS code point + * + * Check whether the character is part of Armenian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArmenian(int code) { + return((code >= 0x0530) && (code <= 0x058F)); +} + +/** + * xmlUCSIsArrows: + * @code: UCS code point + * + * Check whether the character is part of Arrows UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsArrows(int code) { + return((code >= 0x2190) && (code <= 0x21FF)); +} + +/** + * xmlUCSIsBasicLatin: + * @code: UCS code point + * + * Check whether the character is part of BasicLatin UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBasicLatin(int code) { + return((code >= 0x0000) && (code <= 0x007F)); +} + +/** + * xmlUCSIsBengali: + * @code: UCS code point + * + * Check whether the character is part of Bengali UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBengali(int code) { + return((code >= 0x0980) && (code <= 0x09FF)); +} + +/** + * xmlUCSIsBlockElements: + * @code: UCS code point + * + * Check whether the character is part of BlockElements UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBlockElements(int code) { + return((code >= 0x2580) && (code <= 0x259F)); +} + +/** + * xmlUCSIsBopomofo: + * @code: UCS code point + * + * Check whether the character is part of Bopomofo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofo(int code) { + return((code >= 0x3100) && (code <= 0x312F)); +} + +/** + * xmlUCSIsBopomofoExtended: + * @code: UCS code point + * + * Check whether the character is part of BopomofoExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBopomofoExtended(int code) { + return((code >= 0x31A0) && (code <= 0x31BF)); +} + +/** + * xmlUCSIsBoxDrawing: + * @code: UCS code point + * + * Check whether the character is part of BoxDrawing UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBoxDrawing(int code) { + return((code >= 0x2500) && (code <= 0x257F)); +} + +/** + * xmlUCSIsBraillePatterns: + * @code: UCS code point + * + * Check whether the character is part of BraillePatterns UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsBraillePatterns(int code) { + return((code >= 0x2800) && (code <= 0x28FF)); +} + +/** + * xmlUCSIsByzantineMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of ByzantineMusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsByzantineMusicalSymbols(int code) { + return((code >= 0x1D000) && (code <= 0x1D0FF)); +} + +/** + * xmlUCSIsCJKCompatibility: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibility UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibility(int code) { + return((code >= 0x3300) && (code <= 0x33FF)); +} + +/** + * xmlUCSIsCJKCompatibilityForms: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityForms(int code) { + return((code >= 0xFE30) && (code <= 0xFE4F)); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographs(int code) { + return((code >= 0xF900) && (code <= 0xFAFF)); +} + +/** + * xmlUCSIsCJKCompatibilityIdeographsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) { + return((code >= 0x2F800) && (code <= 0x2FA1F)); +} + +/** + * xmlUCSIsCJKRadicalsSupplement: + * @code: UCS code point + * + * Check whether the character is part of CJKRadicalsSupplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKRadicalsSupplement(int code) { + return((code >= 0x2E80) && (code <= 0x2EFF)); +} + +/** + * xmlUCSIsCJKSymbolsandPunctuation: + * @code: UCS code point + * + * Check whether the character is part of CJKSymbolsandPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKSymbolsandPunctuation(int code) { + return((code >= 0x3000) && (code <= 0x303F)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographs: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographs UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographs(int code) { + return((code >= 0x4E00) && (code <= 0x9FFF)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionA: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) { + return((code >= 0x3400) && (code <= 0x4DB5)); +} + +/** + * xmlUCSIsCJKUnifiedIdeographsExtensionB: + * @code: UCS code point + * + * Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) { + return((code >= 0x20000) && (code <= 0x2A6D6)); +} + +/** + * xmlUCSIsCherokee: + * @code: UCS code point + * + * Check whether the character is part of Cherokee UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCherokee(int code) { + return((code >= 0x13A0) && (code <= 0x13FF)); +} + +/** + * xmlUCSIsCombiningDiacriticalMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningDiacriticalMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningDiacriticalMarks(int code) { + return((code >= 0x0300) && (code <= 0x036F)); +} + +/** + * xmlUCSIsCombiningHalfMarks: + * @code: UCS code point + * + * Check whether the character is part of CombiningHalfMarks UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningHalfMarks(int code) { + return((code >= 0xFE20) && (code <= 0xFE2F)); +} + +/** + * xmlUCSIsCombiningMarksforSymbols: + * @code: UCS code point + * + * Check whether the character is part of CombiningMarksforSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCombiningMarksforSymbols(int code) { + return((code >= 0x20D0) && (code <= 0x20FF)); +} + +/** + * xmlUCSIsControlPictures: + * @code: UCS code point + * + * Check whether the character is part of ControlPictures UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsControlPictures(int code) { + return((code >= 0x2400) && (code <= 0x243F)); +} + +/** + * xmlUCSIsCurrencySymbols: + * @code: UCS code point + * + * Check whether the character is part of CurrencySymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCurrencySymbols(int code) { + return((code >= 0x20A0) && (code <= 0x20CF)); +} + +/** + * xmlUCSIsCyrillic: + * @code: UCS code point + * + * Check whether the character is part of Cyrillic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCyrillic(int code) { + return((code >= 0x0400) && (code <= 0x04FF)); +} + +/** + * xmlUCSIsDeseret: + * @code: UCS code point + * + * Check whether the character is part of Deseret UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDeseret(int code) { + return((code >= 0x10400) && (code <= 0x1044F)); +} + +/** + * xmlUCSIsDevanagari: + * @code: UCS code point + * + * Check whether the character is part of Devanagari UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDevanagari(int code) { + return((code >= 0x0900) && (code <= 0x097F)); +} + +/** + * xmlUCSIsDingbats: + * @code: UCS code point + * + * Check whether the character is part of Dingbats UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsDingbats(int code) { + return((code >= 0x2700) && (code <= 0x27BF)); +} + +/** + * xmlUCSIsEnclosedAlphanumerics: + * @code: UCS code point + * + * Check whether the character is part of EnclosedAlphanumerics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedAlphanumerics(int code) { + return((code >= 0x2460) && (code <= 0x24FF)); +} + +/** + * xmlUCSIsEnclosedCJKLettersandMonths: + * @code: UCS code point + * + * Check whether the character is part of EnclosedCJKLettersandMonths UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEnclosedCJKLettersandMonths(int code) { + return((code >= 0x3200) && (code <= 0x32FF)); +} + +/** + * xmlUCSIsEthiopic: + * @code: UCS code point + * + * Check whether the character is part of Ethiopic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsEthiopic(int code) { + return((code >= 0x1200) && (code <= 0x137F)); +} + +/** + * xmlUCSIsGeneralPunctuation: + * @code: UCS code point + * + * Check whether the character is part of GeneralPunctuation UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeneralPunctuation(int code) { + return((code >= 0x2000) && (code <= 0x206F)); +} + +/** + * xmlUCSIsGeometricShapes: + * @code: UCS code point + * + * Check whether the character is part of GeometricShapes UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeometricShapes(int code) { + return((code >= 0x25A0) && (code <= 0x25FF)); +} + +/** + * xmlUCSIsGeorgian: + * @code: UCS code point + * + * Check whether the character is part of Georgian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGeorgian(int code) { + return((code >= 0x10A0) && (code <= 0x10FF)); +} + +/** + * xmlUCSIsGothic: + * @code: UCS code point + * + * Check whether the character is part of Gothic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGothic(int code) { + return((code >= 0x10330) && (code <= 0x1034F)); +} + +/** + * xmlUCSIsGreek: + * @code: UCS code point + * + * Check whether the character is part of Greek UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreek(int code) { + return((code >= 0x0370) && (code <= 0x03FF)); +} + +/** + * xmlUCSIsGreekExtended: + * @code: UCS code point + * + * Check whether the character is part of GreekExtended UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGreekExtended(int code) { + return((code >= 0x1F00) && (code <= 0x1FFF)); +} + +/** + * xmlUCSIsGujarati: + * @code: UCS code point + * + * Check whether the character is part of Gujarati UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGujarati(int code) { + return((code >= 0x0A80) && (code <= 0x0AFF)); +} + +/** + * xmlUCSIsGurmukhi: + * @code: UCS code point + * + * Check whether the character is part of Gurmukhi UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsGurmukhi(int code) { + return((code >= 0x0A00) && (code <= 0x0A7F)); +} + +/** + * xmlUCSIsHalfwidthandFullwidthForms: + * @code: UCS code point + * + * Check whether the character is part of HalfwidthandFullwidthForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHalfwidthandFullwidthForms(int code) { + return((code >= 0xFF00) && (code <= 0xFFEF)); +} + +/** + * xmlUCSIsHangulCompatibilityJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulCompatibilityJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulCompatibilityJamo(int code) { + return((code >= 0x3130) && (code <= 0x318F)); +} + +/** + * xmlUCSIsHangulJamo: + * @code: UCS code point + * + * Check whether the character is part of HangulJamo UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulJamo(int code) { + return((code >= 0x1100) && (code <= 0x11FF)); +} + +/** + * xmlUCSIsHangulSyllables: + * @code: UCS code point + * + * Check whether the character is part of HangulSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHangulSyllables(int code) { + return((code >= 0xAC00) && (code <= 0xD7A3)); +} + +/** + * xmlUCSIsHebrew: + * @code: UCS code point + * + * Check whether the character is part of Hebrew UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHebrew(int code) { + return((code >= 0x0590) && (code <= 0x05FF)); +} + +/** + * xmlUCSIsHighPrivateUseSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighPrivateUseSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighPrivateUseSurrogates(int code) { + return((code >= 0xDB80) && (code <= 0xDBFF)); +} + +/** + * xmlUCSIsHighSurrogates: + * @code: UCS code point + * + * Check whether the character is part of HighSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHighSurrogates(int code) { + return((code >= 0xD800) && (code <= 0xDB7F)); +} + +/** + * xmlUCSIsHiragana: + * @code: UCS code point + * + * Check whether the character is part of Hiragana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsHiragana(int code) { + return((code >= 0x3040) && (code <= 0x309F)); +} + +/** + * xmlUCSIsIPAExtensions: + * @code: UCS code point + * + * Check whether the character is part of IPAExtensions UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIPAExtensions(int code) { + return((code >= 0x0250) && (code <= 0x02AF)); +} + +/** + * xmlUCSIsIdeographicDescriptionCharacters: + * @code: UCS code point + * + * Check whether the character is part of IdeographicDescriptionCharacters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsIdeographicDescriptionCharacters(int code) { + return((code >= 0x2FF0) && (code <= 0x2FFF)); +} + +/** + * xmlUCSIsKanbun: + * @code: UCS code point + * + * Check whether the character is part of Kanbun UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKanbun(int code) { + return((code >= 0x3190) && (code <= 0x319F)); +} + +/** + * xmlUCSIsKangxiRadicals: + * @code: UCS code point + * + * Check whether the character is part of KangxiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKangxiRadicals(int code) { + return((code >= 0x2F00) && (code <= 0x2FDF)); +} + +/** + * xmlUCSIsKannada: + * @code: UCS code point + * + * Check whether the character is part of Kannada UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKannada(int code) { + return((code >= 0x0C80) && (code <= 0x0CFF)); +} + +/** + * xmlUCSIsKatakana: + * @code: UCS code point + * + * Check whether the character is part of Katakana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKatakana(int code) { + return((code >= 0x30A0) && (code <= 0x30FF)); +} + +/** + * xmlUCSIsKhmer: + * @code: UCS code point + * + * Check whether the character is part of Khmer UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsKhmer(int code) { + return((code >= 0x1780) && (code <= 0x17FF)); +} + +/** + * xmlUCSIsLao: + * @code: UCS code point + * + * Check whether the character is part of Lao UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLao(int code) { + return((code >= 0x0E80) && (code <= 0x0EFF)); +} + +/** + * xmlUCSIsLatin1Supplement: + * @code: UCS code point + * + * Check whether the character is part of Latin-1Supplement UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatin1Supplement(int code) { + return((code >= 0x0080) && (code <= 0x00FF)); +} + +/** + * xmlUCSIsLatinExtendedA: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-A UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedA(int code) { + return((code >= 0x0100) && (code <= 0x017F)); +} + +/** + * xmlUCSIsLatinExtendedB: + * @code: UCS code point + * + * Check whether the character is part of LatinExtended-B UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedB(int code) { + return((code >= 0x0180) && (code <= 0x024F)); +} + +/** + * xmlUCSIsLatinExtendedAdditional: + * @code: UCS code point + * + * Check whether the character is part of LatinExtendedAdditional UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLatinExtendedAdditional(int code) { + return((code >= 0x1E00) && (code <= 0x1EFF)); +} + +/** + * xmlUCSIsLetterlikeSymbols: + * @code: UCS code point + * + * Check whether the character is part of LetterlikeSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLetterlikeSymbols(int code) { + return((code >= 0x2100) && (code <= 0x214F)); +} + +/** + * xmlUCSIsLowSurrogates: + * @code: UCS code point + * + * Check whether the character is part of LowSurrogates UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsLowSurrogates(int code) { + return((code >= 0xDC00) && (code <= 0xDFFF)); +} + +/** + * xmlUCSIsMalayalam: + * @code: UCS code point + * + * Check whether the character is part of Malayalam UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMalayalam(int code) { + return((code >= 0x0D00) && (code <= 0x0D7F)); +} + +/** + * xmlUCSIsMathematicalAlphanumericSymbols: + * @code: UCS code point + * + * Check whether the character is part of MathematicalAlphanumericSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalAlphanumericSymbols(int code) { + return((code >= 0x1D400) && (code <= 0x1D7FF)); +} + +/** + * xmlUCSIsMathematicalOperators: + * @code: UCS code point + * + * Check whether the character is part of MathematicalOperators UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMathematicalOperators(int code) { + return((code >= 0x2200) && (code <= 0x22FF)); +} + +/** + * xmlUCSIsMiscellaneousSymbols: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousSymbols(int code) { + return((code >= 0x2600) && (code <= 0x26FF)); +} + +/** + * xmlUCSIsMiscellaneousTechnical: + * @code: UCS code point + * + * Check whether the character is part of MiscellaneousTechnical UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMiscellaneousTechnical(int code) { + return((code >= 0x2300) && (code <= 0x23FF)); +} + +/** + * xmlUCSIsMongolian: + * @code: UCS code point + * + * Check whether the character is part of Mongolian UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMongolian(int code) { + return((code >= 0x1800) && (code <= 0x18AF)); +} + +/** + * xmlUCSIsMusicalSymbols: + * @code: UCS code point + * + * Check whether the character is part of MusicalSymbols UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMusicalSymbols(int code) { + return((code >= 0x1D100) && (code <= 0x1D1FF)); +} + +/** + * xmlUCSIsMyanmar: + * @code: UCS code point + * + * Check whether the character is part of Myanmar UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsMyanmar(int code) { + return((code >= 0x1000) && (code <= 0x109F)); +} + +/** + * xmlUCSIsNumberForms: + * @code: UCS code point + * + * Check whether the character is part of NumberForms UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsNumberForms(int code) { + return((code >= 0x2150) && (code <= 0x218F)); +} + +/** + * xmlUCSIsOgham: + * @code: UCS code point + * + * Check whether the character is part of Ogham UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOgham(int code) { + return((code >= 0x1680) && (code <= 0x169F)); +} + +/** + * xmlUCSIsOldItalic: + * @code: UCS code point + * + * Check whether the character is part of OldItalic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOldItalic(int code) { + return((code >= 0x10300) && (code <= 0x1032F)); +} + +/** + * xmlUCSIsOpticalCharacterRecognition: + * @code: UCS code point + * + * Check whether the character is part of OpticalCharacterRecognition UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOpticalCharacterRecognition(int code) { + return((code >= 0x2440) && (code <= 0x245F)); +} + +/** + * xmlUCSIsOriya: + * @code: UCS code point + * + * Check whether the character is part of Oriya UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsOriya(int code) { + return((code >= 0x0B00) && (code <= 0x0B7F)); +} + +/** + * xmlUCSIsPrivateUse: + * @code: UCS code point + * + * Check whether the character is part of PrivateUse UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsPrivateUse(int code) { + return((code >= 0x100000) && (code <= 0x10FFFD)); +} + +/** + * xmlUCSIsRunic: + * @code: UCS code point + * + * Check whether the character is part of Runic UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsRunic(int code) { + return((code >= 0x16A0) && (code <= 0x16FF)); +} + +/** + * xmlUCSIsSinhala: + * @code: UCS code point + * + * Check whether the character is part of Sinhala UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSinhala(int code) { + return((code >= 0x0D80) && (code <= 0x0DFF)); +} + +/** + * xmlUCSIsSmallFormVariants: + * @code: UCS code point + * + * Check whether the character is part of SmallFormVariants UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSmallFormVariants(int code) { + return((code >= 0xFE50) && (code <= 0xFE6F)); +} + +/** + * xmlUCSIsSpacingModifierLetters: + * @code: UCS code point + * + * Check whether the character is part of SpacingModifierLetters UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpacingModifierLetters(int code) { + return((code >= 0x02B0) && (code <= 0x02FF)); +} + +/** + * xmlUCSIsSpecials: + * @code: UCS code point + * + * Check whether the character is part of Specials UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSpecials(int code) { + return((code >= 0xFFF0) && (code <= 0xFFFD)); +} + +/** + * xmlUCSIsSuperscriptsandSubscripts: + * @code: UCS code point + * + * Check whether the character is part of SuperscriptsandSubscripts UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSuperscriptsandSubscripts(int code) { + return((code >= 0x2070) && (code <= 0x209F)); +} + +/** + * xmlUCSIsSyriac: + * @code: UCS code point + * + * Check whether the character is part of Syriac UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsSyriac(int code) { + return((code >= 0x0700) && (code <= 0x074F)); +} + +/** + * xmlUCSIsTags: + * @code: UCS code point + * + * Check whether the character is part of Tags UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTags(int code) { + return((code >= 0xE0000) && (code <= 0xE007F)); +} + +/** + * xmlUCSIsTamil: + * @code: UCS code point + * + * Check whether the character is part of Tamil UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTamil(int code) { + return((code >= 0x0B80) && (code <= 0x0BFF)); +} + +/** + * xmlUCSIsTelugu: + * @code: UCS code point + * + * Check whether the character is part of Telugu UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTelugu(int code) { + return((code >= 0x0C00) && (code <= 0x0C7F)); +} + +/** + * xmlUCSIsThaana: + * @code: UCS code point + * + * Check whether the character is part of Thaana UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThaana(int code) { + return((code >= 0x0780) && (code <= 0x07BF)); +} + +/** + * xmlUCSIsThai: + * @code: UCS code point + * + * Check whether the character is part of Thai UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsThai(int code) { + return((code >= 0x0E00) && (code <= 0x0E7F)); +} + +/** + * xmlUCSIsTibetan: + * @code: UCS code point + * + * Check whether the character is part of Tibetan UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsTibetan(int code) { + return((code >= 0x0F00) && (code <= 0x0FFF)); +} + +/** + * xmlUCSIsUnifiedCanadianAboriginalSyllabics: + * @code: UCS code point + * + * Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) { + return((code >= 0x1400) && (code <= 0x167F)); +} + +/** + * xmlUCSIsYiRadicals: + * @code: UCS code point + * + * Check whether the character is part of YiRadicals UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiRadicals(int code) { + return((code >= 0xA490) && (code <= 0xA4CF)); +} + +/** + * xmlUCSIsYiSyllables: + * @code: UCS code point + * + * Check whether the character is part of YiSyllables UCS Block + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsYiSyllables(int code) { + return((code >= 0xA000) && (code <= 0xA48F)); +} + +/** + * xmlUCSIsBlock: + * @code: UCS code point + * @block: UCS block name + * + * Check whether the caracter is part of the UCS Block + * + * Returns 1 if true, 0 if false and -1 on unknown block + */ +int +xmlUCSIsBlock(int code, const char *block) { + if (!strcmp(block, "AlphabeticPresentationForms")) + return(xmlUCSIsAlphabeticPresentationForms(code)); + if (!strcmp(block, "Arabic")) + return(xmlUCSIsArabic(code)); + if (!strcmp(block, "ArabicPresentationForms-A")) + return(xmlUCSIsArabicPresentationFormsA(code)); + if (!strcmp(block, "ArabicPresentationForms-B")) + return(xmlUCSIsArabicPresentationFormsB(code)); + if (!strcmp(block, "Armenian")) + return(xmlUCSIsArmenian(code)); + if (!strcmp(block, "Arrows")) + return(xmlUCSIsArrows(code)); + if (!strcmp(block, "BasicLatin")) + return(xmlUCSIsBasicLatin(code)); + if (!strcmp(block, "Bengali")) + return(xmlUCSIsBengali(code)); + if (!strcmp(block, "BlockElements")) + return(xmlUCSIsBlockElements(code)); + if (!strcmp(block, "Bopomofo")) + return(xmlUCSIsBopomofo(code)); + if (!strcmp(block, "BopomofoExtended")) + return(xmlUCSIsBopomofoExtended(code)); + if (!strcmp(block, "BoxDrawing")) + return(xmlUCSIsBoxDrawing(code)); + if (!strcmp(block, "BraillePatterns")) + return(xmlUCSIsBraillePatterns(code)); + if (!strcmp(block, "ByzantineMusicalSymbols")) + return(xmlUCSIsByzantineMusicalSymbols(code)); + if (!strcmp(block, "CJKCompatibility")) + return(xmlUCSIsCJKCompatibility(code)); + if (!strcmp(block, "CJKCompatibilityForms")) + return(xmlUCSIsCJKCompatibilityForms(code)); + if (!strcmp(block, "CJKCompatibilityIdeographs")) + return(xmlUCSIsCJKCompatibilityIdeographs(code)); + if (!strcmp(block, "CJKCompatibilityIdeographsSupplement")) + return(xmlUCSIsCJKCompatibilityIdeographsSupplement(code)); + if (!strcmp(block, "CJKRadicalsSupplement")) + return(xmlUCSIsCJKRadicalsSupplement(code)); + if (!strcmp(block, "CJKSymbolsandPunctuation")) + return(xmlUCSIsCJKSymbolsandPunctuation(code)); + if (!strcmp(block, "CJKUnifiedIdeographs")) + return(xmlUCSIsCJKUnifiedIdeographs(code)); + if (!strcmp(block, "CJKUnifiedIdeographsExtensionA")) + return(xmlUCSIsCJKUnifiedIdeographsExtensionA(code)); + if (!strcmp(block, "CJKUnifiedIdeographsExtensionB")) + return(xmlUCSIsCJKUnifiedIdeographsExtensionB(code)); + if (!strcmp(block, "Cherokee")) + return(xmlUCSIsCherokee(code)); + if (!strcmp(block, "CombiningDiacriticalMarks")) + return(xmlUCSIsCombiningDiacriticalMarks(code)); + if (!strcmp(block, "CombiningHalfMarks")) + return(xmlUCSIsCombiningHalfMarks(code)); + if (!strcmp(block, "CombiningMarksforSymbols")) + return(xmlUCSIsCombiningMarksforSymbols(code)); + if (!strcmp(block, "ControlPictures")) + return(xmlUCSIsControlPictures(code)); + if (!strcmp(block, "CurrencySymbols")) + return(xmlUCSIsCurrencySymbols(code)); + if (!strcmp(block, "Cyrillic")) + return(xmlUCSIsCyrillic(code)); + if (!strcmp(block, "Deseret")) + return(xmlUCSIsDeseret(code)); + if (!strcmp(block, "Devanagari")) + return(xmlUCSIsDevanagari(code)); + if (!strcmp(block, "Dingbats")) + return(xmlUCSIsDingbats(code)); + if (!strcmp(block, "EnclosedAlphanumerics")) + return(xmlUCSIsEnclosedAlphanumerics(code)); + if (!strcmp(block, "EnclosedCJKLettersandMonths")) + return(xmlUCSIsEnclosedCJKLettersandMonths(code)); + if (!strcmp(block, "Ethiopic")) + return(xmlUCSIsEthiopic(code)); + if (!strcmp(block, "GeneralPunctuation")) + return(xmlUCSIsGeneralPunctuation(code)); + if (!strcmp(block, "GeometricShapes")) + return(xmlUCSIsGeometricShapes(code)); + if (!strcmp(block, "Georgian")) + return(xmlUCSIsGeorgian(code)); + if (!strcmp(block, "Gothic")) + return(xmlUCSIsGothic(code)); + if (!strcmp(block, "Greek")) + return(xmlUCSIsGreek(code)); + if (!strcmp(block, "GreekExtended")) + return(xmlUCSIsGreekExtended(code)); + if (!strcmp(block, "Gujarati")) + return(xmlUCSIsGujarati(code)); + if (!strcmp(block, "Gurmukhi")) + return(xmlUCSIsGurmukhi(code)); + if (!strcmp(block, "HalfwidthandFullwidthForms")) + return(xmlUCSIsHalfwidthandFullwidthForms(code)); + if (!strcmp(block, "HangulCompatibilityJamo")) + return(xmlUCSIsHangulCompatibilityJamo(code)); + if (!strcmp(block, "HangulJamo")) + return(xmlUCSIsHangulJamo(code)); + if (!strcmp(block, "HangulSyllables")) + return(xmlUCSIsHangulSyllables(code)); + if (!strcmp(block, "Hebrew")) + return(xmlUCSIsHebrew(code)); + if (!strcmp(block, "HighPrivateUseSurrogates")) + return(xmlUCSIsHighPrivateUseSurrogates(code)); + if (!strcmp(block, "HighSurrogates")) + return(xmlUCSIsHighSurrogates(code)); + if (!strcmp(block, "Hiragana")) + return(xmlUCSIsHiragana(code)); + if (!strcmp(block, "IPAExtensions")) + return(xmlUCSIsIPAExtensions(code)); + if (!strcmp(block, "IdeographicDescriptionCharacters")) + return(xmlUCSIsIdeographicDescriptionCharacters(code)); + if (!strcmp(block, "Kanbun")) + return(xmlUCSIsKanbun(code)); + if (!strcmp(block, "KangxiRadicals")) + return(xmlUCSIsKangxiRadicals(code)); + if (!strcmp(block, "Kannada")) + return(xmlUCSIsKannada(code)); + if (!strcmp(block, "Katakana")) + return(xmlUCSIsKatakana(code)); + if (!strcmp(block, "Khmer")) + return(xmlUCSIsKhmer(code)); + if (!strcmp(block, "Lao")) + return(xmlUCSIsLao(code)); + if (!strcmp(block, "Latin-1Supplement")) + return(xmlUCSIsLatin1Supplement(code)); + if (!strcmp(block, "LatinExtended-A")) + return(xmlUCSIsLatinExtendedA(code)); + if (!strcmp(block, "LatinExtended-B")) + return(xmlUCSIsLatinExtendedB(code)); + if (!strcmp(block, "LatinExtendedAdditional")) + return(xmlUCSIsLatinExtendedAdditional(code)); + if (!strcmp(block, "LetterlikeSymbols")) + return(xmlUCSIsLetterlikeSymbols(code)); + if (!strcmp(block, "LowSurrogates")) + return(xmlUCSIsLowSurrogates(code)); + if (!strcmp(block, "Malayalam")) + return(xmlUCSIsMalayalam(code)); + if (!strcmp(block, "MathematicalAlphanumericSymbols")) + return(xmlUCSIsMathematicalAlphanumericSymbols(code)); + if (!strcmp(block, "MathematicalOperators")) + return(xmlUCSIsMathematicalOperators(code)); + if (!strcmp(block, "MiscellaneousSymbols")) + return(xmlUCSIsMiscellaneousSymbols(code)); + if (!strcmp(block, "MiscellaneousTechnical")) + return(xmlUCSIsMiscellaneousTechnical(code)); + if (!strcmp(block, "Mongolian")) + return(xmlUCSIsMongolian(code)); + if (!strcmp(block, "MusicalSymbols")) + return(xmlUCSIsMusicalSymbols(code)); + if (!strcmp(block, "Myanmar")) + return(xmlUCSIsMyanmar(code)); + if (!strcmp(block, "NumberForms")) + return(xmlUCSIsNumberForms(code)); + if (!strcmp(block, "Ogham")) + return(xmlUCSIsOgham(code)); + if (!strcmp(block, "OldItalic")) + return(xmlUCSIsOldItalic(code)); + if (!strcmp(block, "OpticalCharacterRecognition")) + return(xmlUCSIsOpticalCharacterRecognition(code)); + if (!strcmp(block, "Oriya")) + return(xmlUCSIsOriya(code)); + if (!strcmp(block, "PrivateUse")) + return(xmlUCSIsPrivateUse(code)); + if (!strcmp(block, "Runic")) + return(xmlUCSIsRunic(code)); + if (!strcmp(block, "Sinhala")) + return(xmlUCSIsSinhala(code)); + if (!strcmp(block, "SmallFormVariants")) + return(xmlUCSIsSmallFormVariants(code)); + if (!strcmp(block, "SpacingModifierLetters")) + return(xmlUCSIsSpacingModifierLetters(code)); + if (!strcmp(block, "Specials")) + return(xmlUCSIsSpecials(code)); + if (!strcmp(block, "SuperscriptsandSubscripts")) + return(xmlUCSIsSuperscriptsandSubscripts(code)); + if (!strcmp(block, "Syriac")) + return(xmlUCSIsSyriac(code)); + if (!strcmp(block, "Tags")) + return(xmlUCSIsTags(code)); + if (!strcmp(block, "Tamil")) + return(xmlUCSIsTamil(code)); + if (!strcmp(block, "Telugu")) + return(xmlUCSIsTelugu(code)); + if (!strcmp(block, "Thaana")) + return(xmlUCSIsThaana(code)); + if (!strcmp(block, "Thai")) + return(xmlUCSIsThai(code)); + if (!strcmp(block, "Tibetan")) + return(xmlUCSIsTibetan(code)); + if (!strcmp(block, "UnifiedCanadianAboriginalSyllabics")) + return(xmlUCSIsUnifiedCanadianAboriginalSyllabics(code)); + if (!strcmp(block, "YiRadicals")) + return(xmlUCSIsYiRadicals(code)); + if (!strcmp(block, "YiSyllables")) + return(xmlUCSIsYiSyllables(code)); + return(-1); +} + +/** + * xmlUCSIsCatC: + * @code: UCS code point + * + * Check whether the character is part of C UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatC(int code) { + return(((code >= 0x0) && (code <= 0x1f)) || + ((code >= 0x7f) && (code <= 0x9f)) || + (code == 0x70f) || + ((code >= 0x180b) && (code <= 0x180e)) || + ((code >= 0x200c) && (code <= 0x200f)) || + ((code >= 0x202a) && (code <= 0x202e)) || + ((code >= 0x206a) && (code <= 0x206f)) || + (code == 0xd800) || + ((code >= 0xdb7f) && (code <= 0xdb80)) || + ((code >= 0xdbff) && (code <= 0xdc00)) || + ((code >= 0xdfff) && (code <= 0xe000)) || + (code == 0xf8ff) || + (code == 0xfeff) || + ((code >= 0xfff9) && (code <= 0xfffb)) || + ((code >= 0x1d173) && (code <= 0x1d17a)) || + (code == 0xe0001) || + ((code >= 0xe0020) && (code <= 0xe007f)) || + (code == 0xf0000) || + (code == 0xffffd) || + (code == 0x100000) || + (code == 0x10fffd)); +} + +/** + * xmlUCSIsCatCc: + * @code: UCS code point + * + * Check whether the character is part of Cc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCc(int code) { + return(((code >= 0x0) && (code <= 0x1f)) || + ((code >= 0x7f) && (code <= 0x9f))); +} + +/** + * xmlUCSIsCatCf: + * @code: UCS code point + * + * Check whether the character is part of Cf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCf(int code) { + return((code == 0x70f) || + ((code >= 0x180b) && (code <= 0x180e)) || + ((code >= 0x200c) && (code <= 0x200f)) || + ((code >= 0x202a) && (code <= 0x202e)) || + ((code >= 0x206a) && (code <= 0x206f)) || + (code == 0xfeff) || + ((code >= 0xfff9) && (code <= 0xfffb)) || + ((code >= 0x1d173) && (code <= 0x1d17a)) || + (code == 0xe0001) || + ((code >= 0xe0020) && (code <= 0xe007f))); +} + +/** + * xmlUCSIsCatCo: + * @code: UCS code point + * + * Check whether the character is part of Co UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCo(int code) { + return((code == 0xe000) || + (code == 0xf8ff) || + (code == 0xf0000) || + (code == 0xffffd) || + (code == 0x100000) || + (code == 0x10fffd)); +} + +/** + * xmlUCSIsCatCs: + * @code: UCS code point + * + * Check whether the character is part of Cs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatCs(int code) { + return((code == 0xd800) || + ((code >= 0xdb7f) && (code <= 0xdb80)) || + ((code >= 0xdbff) && (code <= 0xdc00)) || + (code == 0xdfff)); +} + +/** + * xmlUCSIsCatL: + * @code: UCS code point + * + * Check whether the character is part of L UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatL(int code) { + return(((code >= 0x41) && (code <= 0x5a)) || + ((code >= 0x61) && (code <= 0x7a)) || + (code == 0xaa) || + (code == 0xb5) || + (code == 0xba) || + ((code >= 0xc0) && (code <= 0xd6)) || + ((code >= 0xd8) && (code <= 0xf6)) || + ((code >= 0xf8) && (code <= 0x21f)) || + ((code >= 0x222) && (code <= 0x233)) || + ((code >= 0x250) && (code <= 0x2ad)) || + ((code >= 0x2b0) && (code <= 0x2b8)) || + ((code >= 0x2bb) && (code <= 0x2c1)) || + ((code >= 0x2d0) && (code <= 0x2d1)) || + ((code >= 0x2e0) && (code <= 0x2e4)) || + (code == 0x2ee) || + (code == 0x37a) || + (code == 0x386) || + ((code >= 0x388) && (code <= 0x38a)) || + (code == 0x38c) || + ((code >= 0x38e) && (code <= 0x3a1)) || + ((code >= 0x3a3) && (code <= 0x3ce)) || + ((code >= 0x3d0) && (code <= 0x3d7)) || + ((code >= 0x3da) && (code <= 0x3f5)) || + ((code >= 0x400) && (code <= 0x481)) || + ((code >= 0x48c) && (code <= 0x4c4)) || + ((code >= 0x4c7) && (code <= 0x4c8)) || + ((code >= 0x4cb) && (code <= 0x4cc)) || + ((code >= 0x4d0) && (code <= 0x4f5)) || + ((code >= 0x4f8) && (code <= 0x4f9)) || + ((code >= 0x531) && (code <= 0x556)) || + (code == 0x559) || + ((code >= 0x561) && (code <= 0x587)) || + ((code >= 0x5d0) && (code <= 0x5ea)) || + ((code >= 0x5f0) && (code <= 0x5f2)) || + ((code >= 0x621) && (code <= 0x63a)) || + ((code >= 0x640) && (code <= 0x64a)) || + ((code >= 0x671) && (code <= 0x6d3)) || + (code == 0x6d5) || + ((code >= 0x6e5) && (code <= 0x6e6)) || + ((code >= 0x6fa) && (code <= 0x6fc)) || + (code == 0x710) || + ((code >= 0x712) && (code <= 0x72c)) || + ((code >= 0x780) && (code <= 0x7a5)) || + ((code >= 0x905) && (code <= 0x939)) || + (code == 0x93d) || + (code == 0x950) || + ((code >= 0x958) && (code <= 0x961)) || + ((code >= 0x985) && (code <= 0x98c)) || + ((code >= 0x98f) && (code <= 0x990)) || + ((code >= 0x993) && (code <= 0x9a8)) || + ((code >= 0x9aa) && (code <= 0x9b0)) || + (code == 0x9b2) || + ((code >= 0x9b6) && (code <= 0x9b9)) || + ((code >= 0x9dc) && (code <= 0x9dd)) || + ((code >= 0x9df) && (code <= 0x9e1)) || + ((code >= 0x9f0) && (code <= 0x9f1)) || + ((code >= 0xa05) && (code <= 0xa0a)) || + ((code >= 0xa0f) && (code <= 0xa10)) || + ((code >= 0xa13) && (code <= 0xa28)) || + ((code >= 0xa2a) && (code <= 0xa30)) || + ((code >= 0xa32) && (code <= 0xa33)) || + ((code >= 0xa35) && (code <= 0xa36)) || + ((code >= 0xa38) && (code <= 0xa39)) || + ((code >= 0xa59) && (code <= 0xa5c)) || + (code == 0xa5e) || + ((code >= 0xa72) && (code <= 0xa74)) || + ((code >= 0xa85) && (code <= 0xa8b)) || + (code == 0xa8d) || + ((code >= 0xa8f) && (code <= 0xa91)) || + ((code >= 0xa93) && (code <= 0xaa8)) || + ((code >= 0xaaa) && (code <= 0xab0)) || + ((code >= 0xab2) && (code <= 0xab3)) || + ((code >= 0xab5) && (code <= 0xab9)) || + (code == 0xabd) || + (code == 0xad0) || + (code == 0xae0) || + ((code >= 0xb05) && (code <= 0xb0c)) || + ((code >= 0xb0f) && (code <= 0xb10)) || + ((code >= 0xb13) && (code <= 0xb28)) || + ((code >= 0xb2a) && (code <= 0xb30)) || + ((code >= 0xb32) && (code <= 0xb33)) || + ((code >= 0xb36) && (code <= 0xb39)) || + (code == 0xb3d) || + ((code >= 0xb5c) && (code <= 0xb5d)) || + ((code >= 0xb5f) && (code <= 0xb61)) || + ((code >= 0xb85) && (code <= 0xb8a)) || + ((code >= 0xb8e) && (code <= 0xb90)) || + ((code >= 0xb92) && (code <= 0xb95)) || + ((code >= 0xb99) && (code <= 0xb9a)) || + (code == 0xb9c) || + ((code >= 0xb9e) && (code <= 0xb9f)) || + ((code >= 0xba3) && (code <= 0xba4)) || + ((code >= 0xba8) && (code <= 0xbaa)) || + ((code >= 0xbae) && (code <= 0xbb5)) || + ((code >= 0xbb7) && (code <= 0xbb9)) || + ((code >= 0xc05) && (code <= 0xc0c)) || + ((code >= 0xc0e) && (code <= 0xc10)) || + ((code >= 0xc12) && (code <= 0xc28)) || + ((code >= 0xc2a) && (code <= 0xc33)) || + ((code >= 0xc35) && (code <= 0xc39)) || + ((code >= 0xc60) && (code <= 0xc61)) || + ((code >= 0xc85) && (code <= 0xc8c)) || + ((code >= 0xc8e) && (code <= 0xc90)) || + ((code >= 0xc92) && (code <= 0xca8)) || + ((code >= 0xcaa) && (code <= 0xcb3)) || + ((code >= 0xcb5) && (code <= 0xcb9)) || + (code == 0xcde) || + ((code >= 0xce0) && (code <= 0xce1)) || + ((code >= 0xd05) && (code <= 0xd0c)) || + ((code >= 0xd0e) && (code <= 0xd10)) || + ((code >= 0xd12) && (code <= 0xd28)) || + ((code >= 0xd2a) && (code <= 0xd39)) || + ((code >= 0xd60) && (code <= 0xd61)) || + ((code >= 0xd85) && (code <= 0xd96)) || + ((code >= 0xd9a) && (code <= 0xdb1)) || + ((code >= 0xdb3) && (code <= 0xdbb)) || + (code == 0xdbd) || + ((code >= 0xdc0) && (code <= 0xdc6)) || + ((code >= 0xe01) && (code <= 0xe30)) || + ((code >= 0xe32) && (code <= 0xe33)) || + ((code >= 0xe40) && (code <= 0xe46)) || + ((code >= 0xe81) && (code <= 0xe82)) || + (code == 0xe84) || + ((code >= 0xe87) && (code <= 0xe88)) || + (code == 0xe8a) || + (code == 0xe8d) || + ((code >= 0xe94) && (code <= 0xe97)) || + ((code >= 0xe99) && (code <= 0xe9f)) || + ((code >= 0xea1) && (code <= 0xea3)) || + (code == 0xea5) || + (code == 0xea7) || + ((code >= 0xeaa) && (code <= 0xeab)) || + ((code >= 0xead) && (code <= 0xeb0)) || + ((code >= 0xeb2) && (code <= 0xeb3)) || + (code == 0xebd) || + ((code >= 0xec0) && (code <= 0xec4)) || + (code == 0xec6) || + ((code >= 0xedc) && (code <= 0xedd)) || + (code == 0xf00) || + ((code >= 0xf40) && (code <= 0xf47)) || + ((code >= 0xf49) && (code <= 0xf6a)) || + ((code >= 0xf88) && (code <= 0xf8b)) || + ((code >= 0x1000) && (code <= 0x1021)) || + ((code >= 0x1023) && (code <= 0x1027)) || + ((code >= 0x1029) && (code <= 0x102a)) || + ((code >= 0x1050) && (code <= 0x1055)) || + ((code >= 0x10a0) && (code <= 0x10c5)) || + ((code >= 0x10d0) && (code <= 0x10f6)) || + ((code >= 0x1100) && (code <= 0x1159)) || + ((code >= 0x115f) && (code <= 0x11a2)) || + ((code >= 0x11a8) && (code <= 0x11f9)) || + ((code >= 0x1200) && (code <= 0x1206)) || + ((code >= 0x1208) && (code <= 0x1246)) || + (code == 0x1248) || + ((code >= 0x124a) && (code <= 0x124d)) || + ((code >= 0x1250) && (code <= 0x1256)) || + (code == 0x1258) || + ((code >= 0x125a) && (code <= 0x125d)) || + ((code >= 0x1260) && (code <= 0x1286)) || + (code == 0x1288) || + ((code >= 0x128a) && (code <= 0x128d)) || + ((code >= 0x1290) && (code <= 0x12ae)) || + (code == 0x12b0) || + ((code >= 0x12b2) && (code <= 0x12b5)) || + ((code >= 0x12b8) && (code <= 0x12be)) || + (code == 0x12c0) || + ((code >= 0x12c2) && (code <= 0x12c5)) || + ((code >= 0x12c8) && (code <= 0x12ce)) || + ((code >= 0x12d0) && (code <= 0x12d6)) || + ((code >= 0x12d8) && (code <= 0x12ee)) || + ((code >= 0x12f0) && (code <= 0x130e)) || + (code == 0x1310) || + ((code >= 0x1312) && (code <= 0x1315)) || + ((code >= 0x1318) && (code <= 0x131e)) || + ((code >= 0x1320) && (code <= 0x1346)) || + ((code >= 0x1348) && (code <= 0x135a)) || + ((code >= 0x13a0) && (code <= 0x13f4)) || + ((code >= 0x1401) && (code <= 0x166c)) || + ((code >= 0x166f) && (code <= 0x1676)) || + ((code >= 0x1681) && (code <= 0x169a)) || + ((code >= 0x16a0) && (code <= 0x16ea)) || + ((code >= 0x1780) && (code <= 0x17b3)) || + ((code >= 0x1820) && (code <= 0x1877)) || + ((code >= 0x1880) && (code <= 0x18a8)) || + ((code >= 0x1e00) && (code <= 0x1e9b)) || + ((code >= 0x1ea0) && (code <= 0x1ef9)) || + ((code >= 0x1f00) && (code <= 0x1f15)) || + ((code >= 0x1f18) && (code <= 0x1f1d)) || + ((code >= 0x1f20) && (code <= 0x1f45)) || + ((code >= 0x1f48) && (code <= 0x1f4d)) || + ((code >= 0x1f50) && (code <= 0x1f57)) || + (code == 0x1f59) || + (code == 0x1f5b) || + (code == 0x1f5d) || + ((code >= 0x1f5f) && (code <= 0x1f7d)) || + ((code >= 0x1f80) && (code <= 0x1fb4)) || + ((code >= 0x1fb6) && (code <= 0x1fbc)) || + (code == 0x1fbe) || + ((code >= 0x1fc2) && (code <= 0x1fc4)) || + ((code >= 0x1fc6) && (code <= 0x1fcc)) || + ((code >= 0x1fd0) && (code <= 0x1fd3)) || + ((code >= 0x1fd6) && (code <= 0x1fdb)) || + ((code >= 0x1fe0) && (code <= 0x1fec)) || + ((code >= 0x1ff2) && (code <= 0x1ff4)) || + ((code >= 0x1ff6) && (code <= 0x1ffc)) || + (code == 0x207f) || + (code == 0x2102) || + (code == 0x2107) || + ((code >= 0x210a) && (code <= 0x2113)) || + (code == 0x2115) || + ((code >= 0x2119) && (code <= 0x211d)) || + (code == 0x2124) || + (code == 0x2126) || + (code == 0x2128) || + ((code >= 0x212a) && (code <= 0x212d)) || + ((code >= 0x212f) && (code <= 0x2131)) || + ((code >= 0x2133) && (code <= 0x2139)) || + ((code >= 0x3005) && (code <= 0x3006)) || + ((code >= 0x3031) && (code <= 0x3035)) || + ((code >= 0x3041) && (code <= 0x3094)) || + ((code >= 0x309d) && (code <= 0x309e)) || + ((code >= 0x30a1) && (code <= 0x30fa)) || + ((code >= 0x30fc) && (code <= 0x30fe)) || + ((code >= 0x3105) && (code <= 0x312c)) || + ((code >= 0x3131) && (code <= 0x318e)) || + ((code >= 0x31a0) && (code <= 0x31b7)) || + (code == 0x3400) || + (code == 0x4db5) || + (code == 0x4e00) || + (code == 0x9fa5) || + ((code >= 0xa000) && (code <= 0xa48c)) || + (code == 0xac00) || + (code == 0xd7a3) || + ((code >= 0xf900) && (code <= 0xfa2d)) || + ((code >= 0xfb00) && (code <= 0xfb06)) || + ((code >= 0xfb13) && (code <= 0xfb17)) || + (code == 0xfb1d) || + ((code >= 0xfb1f) && (code <= 0xfb28)) || + ((code >= 0xfb2a) && (code <= 0xfb36)) || + ((code >= 0xfb38) && (code <= 0xfb3c)) || + (code == 0xfb3e) || + ((code >= 0xfb40) && (code <= 0xfb41)) || + ((code >= 0xfb43) && (code <= 0xfb44)) || + ((code >= 0xfb46) && (code <= 0xfbb1)) || + ((code >= 0xfbd3) && (code <= 0xfd3d)) || + ((code >= 0xfd50) && (code <= 0xfd8f)) || + ((code >= 0xfd92) && (code <= 0xfdc7)) || + ((code >= 0xfdf0) && (code <= 0xfdfb)) || + ((code >= 0xfe70) && (code <= 0xfe72)) || + (code == 0xfe74) || + ((code >= 0xfe76) && (code <= 0xfefc)) || + ((code >= 0xff21) && (code <= 0xff3a)) || + ((code >= 0xff41) && (code <= 0xff5a)) || + ((code >= 0xff66) && (code <= 0xffbe)) || + ((code >= 0xffc2) && (code <= 0xffc7)) || + ((code >= 0xffca) && (code <= 0xffcf)) || + ((code >= 0xffd2) && (code <= 0xffd7)) || + ((code >= 0xffda) && (code <= 0xffdc)) || + ((code >= 0x10300) && (code <= 0x1031e)) || + ((code >= 0x10330) && (code <= 0x10349)) || + ((code >= 0x10400) && (code <= 0x10425)) || + ((code >= 0x10428) && (code <= 0x1044d)) || + ((code >= 0x1d400) && (code <= 0x1d454)) || + ((code >= 0x1d456) && (code <= 0x1d49c)) || + ((code >= 0x1d49e) && (code <= 0x1d49f)) || + (code == 0x1d4a2) || + ((code >= 0x1d4a5) && (code <= 0x1d4a6)) || + ((code >= 0x1d4a9) && (code <= 0x1d4ac)) || + ((code >= 0x1d4ae) && (code <= 0x1d4b9)) || + (code == 0x1d4bb) || + ((code >= 0x1d4bd) && (code <= 0x1d4c0)) || + ((code >= 0x1d4c2) && (code <= 0x1d4c3)) || + ((code >= 0x1d4c5) && (code <= 0x1d505)) || + ((code >= 0x1d507) && (code <= 0x1d50a)) || + ((code >= 0x1d50d) && (code <= 0x1d514)) || + ((code >= 0x1d516) && (code <= 0x1d51c)) || + ((code >= 0x1d51e) && (code <= 0x1d539)) || + ((code >= 0x1d53b) && (code <= 0x1d53e)) || + ((code >= 0x1d540) && (code <= 0x1d544)) || + (code == 0x1d546) || + ((code >= 0x1d54a) && (code <= 0x1d550)) || + ((code >= 0x1d552) && (code <= 0x1d6a3)) || + ((code >= 0x1d6a8) && (code <= 0x1d6c0)) || + ((code >= 0x1d6c2) && (code <= 0x1d6da)) || + ((code >= 0x1d6dc) && (code <= 0x1d6fa)) || + ((code >= 0x1d6fc) && (code <= 0x1d714)) || + ((code >= 0x1d716) && (code <= 0x1d734)) || + ((code >= 0x1d736) && (code <= 0x1d74e)) || + ((code >= 0x1d750) && (code <= 0x1d76e)) || + ((code >= 0x1d770) && (code <= 0x1d788)) || + ((code >= 0x1d78a) && (code <= 0x1d7a8)) || + ((code >= 0x1d7aa) && (code <= 0x1d7c2)) || + ((code >= 0x1d7c4) && (code <= 0x1d7c9)) || + (code == 0x20000) || + (code == 0x2a6d6) || + ((code >= 0x2f800) && (code <= 0x2fa1d))); +} + +/** + * xmlUCSIsCatLl: + * @code: UCS code point + * + * Check whether the character is part of Ll UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLl(int code) { + return(((code >= 0x61) && (code <= 0x7a)) || + (code == 0xaa) || + (code == 0xb5) || + (code == 0xba) || + ((code >= 0xdf) && (code <= 0xf6)) || + ((code >= 0xf8) && (code <= 0xff)) || + (code == 0x101) || + (code == 0x103) || + (code == 0x105) || + (code == 0x107) || + (code == 0x109) || + (code == 0x10b) || + (code == 0x10d) || + (code == 0x10f) || + (code == 0x111) || + (code == 0x113) || + (code == 0x115) || + (code == 0x117) || + (code == 0x119) || + (code == 0x11b) || + (code == 0x11d) || + (code == 0x11f) || + (code == 0x121) || + (code == 0x123) || + (code == 0x125) || + (code == 0x127) || + (code == 0x129) || + (code == 0x12b) || + (code == 0x12d) || + (code == 0x12f) || + (code == 0x131) || + (code == 0x133) || + (code == 0x135) || + ((code >= 0x137) && (code <= 0x138)) || + (code == 0x13a) || + (code == 0x13c) || + (code == 0x13e) || + (code == 0x140) || + (code == 0x142) || + (code == 0x144) || + (code == 0x146) || + ((code >= 0x148) && (code <= 0x149)) || + (code == 0x14b) || + (code == 0x14d) || + (code == 0x14f) || + (code == 0x151) || + (code == 0x153) || + (code == 0x155) || + (code == 0x157) || + (code == 0x159) || + (code == 0x15b) || + (code == 0x15d) || + (code == 0x15f) || + (code == 0x161) || + (code == 0x163) || + (code == 0x165) || + (code == 0x167) || + (code == 0x169) || + (code == 0x16b) || + (code == 0x16d) || + (code == 0x16f) || + (code == 0x171) || + (code == 0x173) || + (code == 0x175) || + (code == 0x177) || + (code == 0x17a) || + (code == 0x17c) || + ((code >= 0x17e) && (code <= 0x180)) || + (code == 0x183) || + (code == 0x185) || + (code == 0x188) || + ((code >= 0x18c) && (code <= 0x18d)) || + (code == 0x192) || + (code == 0x195) || + ((code >= 0x199) && (code <= 0x19b)) || + (code == 0x19e) || + (code == 0x1a1) || + (code == 0x1a3) || + (code == 0x1a5) || + (code == 0x1a8) || + ((code >= 0x1aa) && (code <= 0x1ab)) || + (code == 0x1ad) || + (code == 0x1b0) || + (code == 0x1b4) || + (code == 0x1b6) || + ((code >= 0x1b9) && (code <= 0x1ba)) || + ((code >= 0x1bd) && (code <= 0x1bf)) || + (code == 0x1c6) || + (code == 0x1c9) || + (code == 0x1cc) || + (code == 0x1ce) || + (code == 0x1d0) || + (code == 0x1d2) || + (code == 0x1d4) || + (code == 0x1d6) || + (code == 0x1d8) || + (code == 0x1da) || + ((code >= 0x1dc) && (code <= 0x1dd)) || + (code == 0x1df) || + (code == 0x1e1) || + (code == 0x1e3) || + (code == 0x1e5) || + (code == 0x1e7) || + (code == 0x1e9) || + (code == 0x1eb) || + (code == 0x1ed) || + ((code >= 0x1ef) && (code <= 0x1f0)) || + (code == 0x1f3) || + (code == 0x1f5) || + (code == 0x1f9) || + (code == 0x1fb) || + (code == 0x1fd) || + (code == 0x1ff) || + (code == 0x201) || + (code == 0x203) || + (code == 0x205) || + (code == 0x207) || + (code == 0x209) || + (code == 0x20b) || + (code == 0x20d) || + (code == 0x20f) || + (code == 0x211) || + (code == 0x213) || + (code == 0x215) || + (code == 0x217) || + (code == 0x219) || + (code == 0x21b) || + (code == 0x21d) || + (code == 0x21f) || + (code == 0x223) || + (code == 0x225) || + (code == 0x227) || + (code == 0x229) || + (code == 0x22b) || + (code == 0x22d) || + (code == 0x22f) || + (code == 0x231) || + (code == 0x233) || + ((code >= 0x250) && (code <= 0x2ad)) || + (code == 0x390) || + ((code >= 0x3ac) && (code <= 0x3ce)) || + ((code >= 0x3d0) && (code <= 0x3d1)) || + ((code >= 0x3d5) && (code <= 0x3d7)) || + (code == 0x3db) || + (code == 0x3dd) || + (code == 0x3df) || + (code == 0x3e1) || + (code == 0x3e3) || + (code == 0x3e5) || + (code == 0x3e7) || + (code == 0x3e9) || + (code == 0x3eb) || + (code == 0x3ed) || + ((code >= 0x3ef) && (code <= 0x3f3)) || + (code == 0x3f5) || + ((code >= 0x430) && (code <= 0x45f)) || + (code == 0x461) || + (code == 0x463) || + (code == 0x465) || + (code == 0x467) || + (code == 0x469) || + (code == 0x46b) || + (code == 0x46d) || + (code == 0x46f) || + (code == 0x471) || + (code == 0x473) || + (code == 0x475) || + (code == 0x477) || + (code == 0x479) || + (code == 0x47b) || + (code == 0x47d) || + (code == 0x47f) || + (code == 0x481) || + (code == 0x48d) || + (code == 0x48f) || + (code == 0x491) || + (code == 0x493) || + (code == 0x495) || + (code == 0x497) || + (code == 0x499) || + (code == 0x49b) || + (code == 0x49d) || + (code == 0x49f) || + (code == 0x4a1) || + (code == 0x4a3) || + (code == 0x4a5) || + (code == 0x4a7) || + (code == 0x4a9) || + (code == 0x4ab) || + (code == 0x4ad) || + (code == 0x4af) || + (code == 0x4b1) || + (code == 0x4b3) || + (code == 0x4b5) || + (code == 0x4b7) || + (code == 0x4b9) || + (code == 0x4bb) || + (code == 0x4bd) || + (code == 0x4bf) || + (code == 0x4c2) || + (code == 0x4c4) || + (code == 0x4c8) || + (code == 0x4cc) || + (code == 0x4d1) || + (code == 0x4d3) || + (code == 0x4d5) || + (code == 0x4d7) || + (code == 0x4d9) || + (code == 0x4db) || + (code == 0x4dd) || + (code == 0x4df) || + (code == 0x4e1) || + (code == 0x4e3) || + (code == 0x4e5) || + (code == 0x4e7) || + (code == 0x4e9) || + (code == 0x4eb) || + (code == 0x4ed) || + (code == 0x4ef) || + (code == 0x4f1) || + (code == 0x4f3) || + (code == 0x4f5) || + (code == 0x4f9) || + ((code >= 0x561) && (code <= 0x587)) || + (code == 0x1e01) || + (code == 0x1e03) || + (code == 0x1e05) || + (code == 0x1e07) || + (code == 0x1e09) || + (code == 0x1e0b) || + (code == 0x1e0d) || + (code == 0x1e0f) || + (code == 0x1e11) || + (code == 0x1e13) || + (code == 0x1e15) || + (code == 0x1e17) || + (code == 0x1e19) || + (code == 0x1e1b) || + (code == 0x1e1d) || + (code == 0x1e1f) || + (code == 0x1e21) || + (code == 0x1e23) || + (code == 0x1e25) || + (code == 0x1e27) || + (code == 0x1e29) || + (code == 0x1e2b) || + (code == 0x1e2d) || + (code == 0x1e2f) || + (code == 0x1e31) || + (code == 0x1e33) || + (code == 0x1e35) || + (code == 0x1e37) || + (code == 0x1e39) || + (code == 0x1e3b) || + (code == 0x1e3d) || + (code == 0x1e3f) || + (code == 0x1e41) || + (code == 0x1e43) || + (code == 0x1e45) || + (code == 0x1e47) || + (code == 0x1e49) || + (code == 0x1e4b) || + (code == 0x1e4d) || + (code == 0x1e4f) || + (code == 0x1e51) || + (code == 0x1e53) || + (code == 0x1e55) || + (code == 0x1e57) || + (code == 0x1e59) || + (code == 0x1e5b) || + (code == 0x1e5d) || + (code == 0x1e5f) || + (code == 0x1e61) || + (code == 0x1e63) || + (code == 0x1e65) || + (code == 0x1e67) || + (code == 0x1e69) || + (code == 0x1e6b) || + (code == 0x1e6d) || + (code == 0x1e6f) || + (code == 0x1e71) || + (code == 0x1e73) || + (code == 0x1e75) || + (code == 0x1e77) || + (code == 0x1e79) || + (code == 0x1e7b) || + (code == 0x1e7d) || + (code == 0x1e7f) || + (code == 0x1e81) || + (code == 0x1e83) || + (code == 0x1e85) || + (code == 0x1e87) || + (code == 0x1e89) || + (code == 0x1e8b) || + (code == 0x1e8d) || + (code == 0x1e8f) || + (code == 0x1e91) || + (code == 0x1e93) || + ((code >= 0x1e95) && (code <= 0x1e9b)) || + (code == 0x1ea1) || + (code == 0x1ea3) || + (code == 0x1ea5) || + (code == 0x1ea7) || + (code == 0x1ea9) || + (code == 0x1eab) || + (code == 0x1ead) || + (code == 0x1eaf) || + (code == 0x1eb1) || + (code == 0x1eb3) || + (code == 0x1eb5) || + (code == 0x1eb7) || + (code == 0x1eb9) || + (code == 0x1ebb) || + (code == 0x1ebd) || + (code == 0x1ebf) || + (code == 0x1ec1) || + (code == 0x1ec3) || + (code == 0x1ec5) || + (code == 0x1ec7) || + (code == 0x1ec9) || + (code == 0x1ecb) || + (code == 0x1ecd) || + (code == 0x1ecf) || + (code == 0x1ed1) || + (code == 0x1ed3) || + (code == 0x1ed5) || + (code == 0x1ed7) || + (code == 0x1ed9) || + (code == 0x1edb) || + (code == 0x1edd) || + (code == 0x1edf) || + (code == 0x1ee1) || + (code == 0x1ee3) || + (code == 0x1ee5) || + (code == 0x1ee7) || + (code == 0x1ee9) || + (code == 0x1eeb) || + (code == 0x1eed) || + (code == 0x1eef) || + (code == 0x1ef1) || + (code == 0x1ef3) || + (code == 0x1ef5) || + (code == 0x1ef7) || + (code == 0x1ef9) || + ((code >= 0x1f00) && (code <= 0x1f07)) || + ((code >= 0x1f10) && (code <= 0x1f15)) || + ((code >= 0x1f20) && (code <= 0x1f27)) || + ((code >= 0x1f30) && (code <= 0x1f37)) || + ((code >= 0x1f40) && (code <= 0x1f45)) || + ((code >= 0x1f50) && (code <= 0x1f57)) || + ((code >= 0x1f60) && (code <= 0x1f67)) || + ((code >= 0x1f70) && (code <= 0x1f7d)) || + ((code >= 0x1f80) && (code <= 0x1f87)) || + ((code >= 0x1f90) && (code <= 0x1f97)) || + ((code >= 0x1fa0) && (code <= 0x1fa7)) || + ((code >= 0x1fb0) && (code <= 0x1fb4)) || + ((code >= 0x1fb6) && (code <= 0x1fb7)) || + (code == 0x1fbe) || + ((code >= 0x1fc2) && (code <= 0x1fc4)) || + ((code >= 0x1fc6) && (code <= 0x1fc7)) || + ((code >= 0x1fd0) && (code <= 0x1fd3)) || + ((code >= 0x1fd6) && (code <= 0x1fd7)) || + ((code >= 0x1fe0) && (code <= 0x1fe7)) || + ((code >= 0x1ff2) && (code <= 0x1ff4)) || + ((code >= 0x1ff6) && (code <= 0x1ff7)) || + (code == 0x207f) || + (code == 0x210a) || + ((code >= 0x210e) && (code <= 0x210f)) || + (code == 0x2113) || + (code == 0x212f) || + (code == 0x2134) || + (code == 0x2139) || + ((code >= 0xfb00) && (code <= 0xfb06)) || + ((code >= 0xfb13) && (code <= 0xfb17)) || + ((code >= 0xff41) && (code <= 0xff5a)) || + ((code >= 0x10428) && (code <= 0x1044d)) || + ((code >= 0x1d41a) && (code <= 0x1d433)) || + ((code >= 0x1d44e) && (code <= 0x1d454)) || + ((code >= 0x1d456) && (code <= 0x1d467)) || + ((code >= 0x1d482) && (code <= 0x1d49b)) || + ((code >= 0x1d4b6) && (code <= 0x1d4b9)) || + (code == 0x1d4bb) || + ((code >= 0x1d4bd) && (code <= 0x1d4c0)) || + ((code >= 0x1d4c2) && (code <= 0x1d4c3)) || + ((code >= 0x1d4c5) && (code <= 0x1d4cf)) || + ((code >= 0x1d4ea) && (code <= 0x1d503)) || + ((code >= 0x1d51e) && (code <= 0x1d537)) || + ((code >= 0x1d552) && (code <= 0x1d56b)) || + ((code >= 0x1d586) && (code <= 0x1d59f)) || + ((code >= 0x1d5ba) && (code <= 0x1d5d3)) || + ((code >= 0x1d5ee) && (code <= 0x1d607)) || + ((code >= 0x1d622) && (code <= 0x1d63b)) || + ((code >= 0x1d656) && (code <= 0x1d66f)) || + ((code >= 0x1d68a) && (code <= 0x1d6a3)) || + ((code >= 0x1d6c2) && (code <= 0x1d6da)) || + ((code >= 0x1d6dc) && (code <= 0x1d6e1)) || + ((code >= 0x1d6fc) && (code <= 0x1d714)) || + ((code >= 0x1d716) && (code <= 0x1d71b)) || + ((code >= 0x1d736) && (code <= 0x1d74e)) || + ((code >= 0x1d750) && (code <= 0x1d755)) || + ((code >= 0x1d770) && (code <= 0x1d788)) || + ((code >= 0x1d78a) && (code <= 0x1d78f)) || + ((code >= 0x1d7aa) && (code <= 0x1d7c2)) || + ((code >= 0x1d7c4) && (code <= 0x1d7c9))); +} + +/** + * xmlUCSIsCatLm: + * @code: UCS code point + * + * Check whether the character is part of Lm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLm(int code) { + return(((code >= 0x2b0) && (code <= 0x2b8)) || + ((code >= 0x2bb) && (code <= 0x2c1)) || + ((code >= 0x2d0) && (code <= 0x2d1)) || + ((code >= 0x2e0) && (code <= 0x2e4)) || + (code == 0x2ee) || + (code == 0x37a) || + (code == 0x559) || + (code == 0x640) || + ((code >= 0x6e5) && (code <= 0x6e6)) || + (code == 0xe46) || + (code == 0xec6) || + (code == 0x1843) || + (code == 0x3005) || + ((code >= 0x3031) && (code <= 0x3035)) || + ((code >= 0x309d) && (code <= 0x309e)) || + ((code >= 0x30fc) && (code <= 0x30fe)) || + (code == 0xff70) || + ((code >= 0xff9e) && (code <= 0xff9f))); +} + +/** + * xmlUCSIsCatLo: + * @code: UCS code point + * + * Check whether the character is part of Lo UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLo(int code) { + return((code == 0x1bb) || + ((code >= 0x1c0) && (code <= 0x1c3)) || + ((code >= 0x5d0) && (code <= 0x5ea)) || + ((code >= 0x5f0) && (code <= 0x5f2)) || + ((code >= 0x621) && (code <= 0x63a)) || + ((code >= 0x641) && (code <= 0x64a)) || + ((code >= 0x671) && (code <= 0x6d3)) || + (code == 0x6d5) || + ((code >= 0x6fa) && (code <= 0x6fc)) || + (code == 0x710) || + ((code >= 0x712) && (code <= 0x72c)) || + ((code >= 0x780) && (code <= 0x7a5)) || + ((code >= 0x905) && (code <= 0x939)) || + (code == 0x93d) || + (code == 0x950) || + ((code >= 0x958) && (code <= 0x961)) || + ((code >= 0x985) && (code <= 0x98c)) || + ((code >= 0x98f) && (code <= 0x990)) || + ((code >= 0x993) && (code <= 0x9a8)) || + ((code >= 0x9aa) && (code <= 0x9b0)) || + (code == 0x9b2) || + ((code >= 0x9b6) && (code <= 0x9b9)) || + ((code >= 0x9dc) && (code <= 0x9dd)) || + ((code >= 0x9df) && (code <= 0x9e1)) || + ((code >= 0x9f0) && (code <= 0x9f1)) || + ((code >= 0xa05) && (code <= 0xa0a)) || + ((code >= 0xa0f) && (code <= 0xa10)) || + ((code >= 0xa13) && (code <= 0xa28)) || + ((code >= 0xa2a) && (code <= 0xa30)) || + ((code >= 0xa32) && (code <= 0xa33)) || + ((code >= 0xa35) && (code <= 0xa36)) || + ((code >= 0xa38) && (code <= 0xa39)) || + ((code >= 0xa59) && (code <= 0xa5c)) || + (code == 0xa5e) || + ((code >= 0xa72) && (code <= 0xa74)) || + ((code >= 0xa85) && (code <= 0xa8b)) || + (code == 0xa8d) || + ((code >= 0xa8f) && (code <= 0xa91)) || + ((code >= 0xa93) && (code <= 0xaa8)) || + ((code >= 0xaaa) && (code <= 0xab0)) || + ((code >= 0xab2) && (code <= 0xab3)) || + ((code >= 0xab5) && (code <= 0xab9)) || + (code == 0xabd) || + (code == 0xad0) || + (code == 0xae0) || + ((code >= 0xb05) && (code <= 0xb0c)) || + ((code >= 0xb0f) && (code <= 0xb10)) || + ((code >= 0xb13) && (code <= 0xb28)) || + ((code >= 0xb2a) && (code <= 0xb30)) || + ((code >= 0xb32) && (code <= 0xb33)) || + ((code >= 0xb36) && (code <= 0xb39)) || + (code == 0xb3d) || + ((code >= 0xb5c) && (code <= 0xb5d)) || + ((code >= 0xb5f) && (code <= 0xb61)) || + ((code >= 0xb85) && (code <= 0xb8a)) || + ((code >= 0xb8e) && (code <= 0xb90)) || + ((code >= 0xb92) && (code <= 0xb95)) || + ((code >= 0xb99) && (code <= 0xb9a)) || + (code == 0xb9c) || + ((code >= 0xb9e) && (code <= 0xb9f)) || + ((code >= 0xba3) && (code <= 0xba4)) || + ((code >= 0xba8) && (code <= 0xbaa)) || + ((code >= 0xbae) && (code <= 0xbb5)) || + ((code >= 0xbb7) && (code <= 0xbb9)) || + ((code >= 0xc05) && (code <= 0xc0c)) || + ((code >= 0xc0e) && (code <= 0xc10)) || + ((code >= 0xc12) && (code <= 0xc28)) || + ((code >= 0xc2a) && (code <= 0xc33)) || + ((code >= 0xc35) && (code <= 0xc39)) || + ((code >= 0xc60) && (code <= 0xc61)) || + ((code >= 0xc85) && (code <= 0xc8c)) || + ((code >= 0xc8e) && (code <= 0xc90)) || + ((code >= 0xc92) && (code <= 0xca8)) || + ((code >= 0xcaa) && (code <= 0xcb3)) || + ((code >= 0xcb5) && (code <= 0xcb9)) || + (code == 0xcde) || + ((code >= 0xce0) && (code <= 0xce1)) || + ((code >= 0xd05) && (code <= 0xd0c)) || + ((code >= 0xd0e) && (code <= 0xd10)) || + ((code >= 0xd12) && (code <= 0xd28)) || + ((code >= 0xd2a) && (code <= 0xd39)) || + ((code >= 0xd60) && (code <= 0xd61)) || + ((code >= 0xd85) && (code <= 0xd96)) || + ((code >= 0xd9a) && (code <= 0xdb1)) || + ((code >= 0xdb3) && (code <= 0xdbb)) || + (code == 0xdbd) || + ((code >= 0xdc0) && (code <= 0xdc6)) || + ((code >= 0xe01) && (code <= 0xe30)) || + ((code >= 0xe32) && (code <= 0xe33)) || + ((code >= 0xe40) && (code <= 0xe45)) || + ((code >= 0xe81) && (code <= 0xe82)) || + (code == 0xe84) || + ((code >= 0xe87) && (code <= 0xe88)) || + (code == 0xe8a) || + (code == 0xe8d) || + ((code >= 0xe94) && (code <= 0xe97)) || + ((code >= 0xe99) && (code <= 0xe9f)) || + ((code >= 0xea1) && (code <= 0xea3)) || + (code == 0xea5) || + (code == 0xea7) || + ((code >= 0xeaa) && (code <= 0xeab)) || + ((code >= 0xead) && (code <= 0xeb0)) || + ((code >= 0xeb2) && (code <= 0xeb3)) || + (code == 0xebd) || + ((code >= 0xec0) && (code <= 0xec4)) || + ((code >= 0xedc) && (code <= 0xedd)) || + (code == 0xf00) || + ((code >= 0xf40) && (code <= 0xf47)) || + ((code >= 0xf49) && (code <= 0xf6a)) || + ((code >= 0xf88) && (code <= 0xf8b)) || + ((code >= 0x1000) && (code <= 0x1021)) || + ((code >= 0x1023) && (code <= 0x1027)) || + ((code >= 0x1029) && (code <= 0x102a)) || + ((code >= 0x1050) && (code <= 0x1055)) || + ((code >= 0x10d0) && (code <= 0x10f6)) || + ((code >= 0x1100) && (code <= 0x1159)) || + ((code >= 0x115f) && (code <= 0x11a2)) || + ((code >= 0x11a8) && (code <= 0x11f9)) || + ((code >= 0x1200) && (code <= 0x1206)) || + ((code >= 0x1208) && (code <= 0x1246)) || + (code == 0x1248) || + ((code >= 0x124a) && (code <= 0x124d)) || + ((code >= 0x1250) && (code <= 0x1256)) || + (code == 0x1258) || + ((code >= 0x125a) && (code <= 0x125d)) || + ((code >= 0x1260) && (code <= 0x1286)) || + (code == 0x1288) || + ((code >= 0x128a) && (code <= 0x128d)) || + ((code >= 0x1290) && (code <= 0x12ae)) || + (code == 0x12b0) || + ((code >= 0x12b2) && (code <= 0x12b5)) || + ((code >= 0x12b8) && (code <= 0x12be)) || + (code == 0x12c0) || + ((code >= 0x12c2) && (code <= 0x12c5)) || + ((code >= 0x12c8) && (code <= 0x12ce)) || + ((code >= 0x12d0) && (code <= 0x12d6)) || + ((code >= 0x12d8) && (code <= 0x12ee)) || + ((code >= 0x12f0) && (code <= 0x130e)) || + (code == 0x1310) || + ((code >= 0x1312) && (code <= 0x1315)) || + ((code >= 0x1318) && (code <= 0x131e)) || + ((code >= 0x1320) && (code <= 0x1346)) || + ((code >= 0x1348) && (code <= 0x135a)) || + ((code >= 0x13a0) && (code <= 0x13f4)) || + ((code >= 0x1401) && (code <= 0x166c)) || + ((code >= 0x166f) && (code <= 0x1676)) || + ((code >= 0x1681) && (code <= 0x169a)) || + ((code >= 0x16a0) && (code <= 0x16ea)) || + ((code >= 0x1780) && (code <= 0x17b3)) || + ((code >= 0x1820) && (code <= 0x1842)) || + ((code >= 0x1844) && (code <= 0x1877)) || + ((code >= 0x1880) && (code <= 0x18a8)) || + ((code >= 0x2135) && (code <= 0x2138)) || + (code == 0x3006) || + ((code >= 0x3041) && (code <= 0x3094)) || + ((code >= 0x30a1) && (code <= 0x30fa)) || + ((code >= 0x3105) && (code <= 0x312c)) || + ((code >= 0x3131) && (code <= 0x318e)) || + ((code >= 0x31a0) && (code <= 0x31b7)) || + (code == 0x3400) || + (code == 0x4db5) || + (code == 0x4e00) || + (code == 0x9fa5) || + ((code >= 0xa000) && (code <= 0xa48c)) || + (code == 0xac00) || + (code == 0xd7a3) || + ((code >= 0xf900) && (code <= 0xfa2d)) || + (code == 0xfb1d) || + ((code >= 0xfb1f) && (code <= 0xfb28)) || + ((code >= 0xfb2a) && (code <= 0xfb36)) || + ((code >= 0xfb38) && (code <= 0xfb3c)) || + (code == 0xfb3e) || + ((code >= 0xfb40) && (code <= 0xfb41)) || + ((code >= 0xfb43) && (code <= 0xfb44)) || + ((code >= 0xfb46) && (code <= 0xfbb1)) || + ((code >= 0xfbd3) && (code <= 0xfd3d)) || + ((code >= 0xfd50) && (code <= 0xfd8f)) || + ((code >= 0xfd92) && (code <= 0xfdc7)) || + ((code >= 0xfdf0) && (code <= 0xfdfb)) || + ((code >= 0xfe70) && (code <= 0xfe72)) || + (code == 0xfe74) || + ((code >= 0xfe76) && (code <= 0xfefc)) || + ((code >= 0xff66) && (code <= 0xff6f)) || + ((code >= 0xff71) && (code <= 0xff9d)) || + ((code >= 0xffa0) && (code <= 0xffbe)) || + ((code >= 0xffc2) && (code <= 0xffc7)) || + ((code >= 0xffca) && (code <= 0xffcf)) || + ((code >= 0xffd2) && (code <= 0xffd7)) || + ((code >= 0xffda) && (code <= 0xffdc)) || + ((code >= 0x10300) && (code <= 0x1031e)) || + ((code >= 0x10330) && (code <= 0x10349)) || + (code == 0x20000) || + (code == 0x2a6d6) || + ((code >= 0x2f800) && (code <= 0x2fa1d))); +} + +/** + * xmlUCSIsCatLt: + * @code: UCS code point + * + * Check whether the character is part of Lt UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLt(int code) { + return((code == 0x1c5) || + (code == 0x1c8) || + (code == 0x1cb) || + (code == 0x1f2) || + ((code >= 0x1f88) && (code <= 0x1f8f)) || + ((code >= 0x1f98) && (code <= 0x1f9f)) || + ((code >= 0x1fa8) && (code <= 0x1faf)) || + (code == 0x1fbc) || + (code == 0x1fcc) || + (code == 0x1ffc)); +} + +/** + * xmlUCSIsCatLu: + * @code: UCS code point + * + * Check whether the character is part of Lu UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatLu(int code) { + return(((code >= 0x41) && (code <= 0x5a)) || + ((code >= 0xc0) && (code <= 0xd6)) || + ((code >= 0xd8) && (code <= 0xde)) || + (code == 0x100) || + (code == 0x102) || + (code == 0x104) || + (code == 0x106) || + (code == 0x108) || + (code == 0x10a) || + (code == 0x10c) || + (code == 0x10e) || + (code == 0x110) || + (code == 0x112) || + (code == 0x114) || + (code == 0x116) || + (code == 0x118) || + (code == 0x11a) || + (code == 0x11c) || + (code == 0x11e) || + (code == 0x120) || + (code == 0x122) || + (code == 0x124) || + (code == 0x126) || + (code == 0x128) || + (code == 0x12a) || + (code == 0x12c) || + (code == 0x12e) || + (code == 0x130) || + (code == 0x132) || + (code == 0x134) || + (code == 0x136) || + (code == 0x139) || + (code == 0x13b) || + (code == 0x13d) || + (code == 0x13f) || + (code == 0x141) || + (code == 0x143) || + (code == 0x145) || + (code == 0x147) || + (code == 0x14a) || + (code == 0x14c) || + (code == 0x14e) || + (code == 0x150) || + (code == 0x152) || + (code == 0x154) || + (code == 0x156) || + (code == 0x158) || + (code == 0x15a) || + (code == 0x15c) || + (code == 0x15e) || + (code == 0x160) || + (code == 0x162) || + (code == 0x164) || + (code == 0x166) || + (code == 0x168) || + (code == 0x16a) || + (code == 0x16c) || + (code == 0x16e) || + (code == 0x170) || + (code == 0x172) || + (code == 0x174) || + (code == 0x176) || + ((code >= 0x178) && (code <= 0x179)) || + (code == 0x17b) || + (code == 0x17d) || + ((code >= 0x181) && (code <= 0x182)) || + (code == 0x184) || + ((code >= 0x186) && (code <= 0x187)) || + ((code >= 0x189) && (code <= 0x18b)) || + ((code >= 0x18e) && (code <= 0x191)) || + ((code >= 0x193) && (code <= 0x194)) || + ((code >= 0x196) && (code <= 0x198)) || + ((code >= 0x19c) && (code <= 0x19d)) || + ((code >= 0x19f) && (code <= 0x1a0)) || + (code == 0x1a2) || + (code == 0x1a4) || + ((code >= 0x1a6) && (code <= 0x1a7)) || + (code == 0x1a9) || + (code == 0x1ac) || + ((code >= 0x1ae) && (code <= 0x1af)) || + ((code >= 0x1b1) && (code <= 0x1b3)) || + (code == 0x1b5) || + ((code >= 0x1b7) && (code <= 0x1b8)) || + (code == 0x1bc) || + (code == 0x1c4) || + (code == 0x1c7) || + (code == 0x1ca) || + (code == 0x1cd) || + (code == 0x1cf) || + (code == 0x1d1) || + (code == 0x1d3) || + (code == 0x1d5) || + (code == 0x1d7) || + (code == 0x1d9) || + (code == 0x1db) || + (code == 0x1de) || + (code == 0x1e0) || + (code == 0x1e2) || + (code == 0x1e4) || + (code == 0x1e6) || + (code == 0x1e8) || + (code == 0x1ea) || + (code == 0x1ec) || + (code == 0x1ee) || + (code == 0x1f1) || + (code == 0x1f4) || + ((code >= 0x1f6) && (code <= 0x1f8)) || + (code == 0x1fa) || + (code == 0x1fc) || + (code == 0x1fe) || + (code == 0x200) || + (code == 0x202) || + (code == 0x204) || + (code == 0x206) || + (code == 0x208) || + (code == 0x20a) || + (code == 0x20c) || + (code == 0x20e) || + (code == 0x210) || + (code == 0x212) || + (code == 0x214) || + (code == 0x216) || + (code == 0x218) || + (code == 0x21a) || + (code == 0x21c) || + (code == 0x21e) || + (code == 0x222) || + (code == 0x224) || + (code == 0x226) || + (code == 0x228) || + (code == 0x22a) || + (code == 0x22c) || + (code == 0x22e) || + (code == 0x230) || + (code == 0x232) || + (code == 0x386) || + ((code >= 0x388) && (code <= 0x38a)) || + (code == 0x38c) || + ((code >= 0x38e) && (code <= 0x38f)) || + ((code >= 0x391) && (code <= 0x3a1)) || + ((code >= 0x3a3) && (code <= 0x3ab)) || + ((code >= 0x3d2) && (code <= 0x3d4)) || + (code == 0x3da) || + (code == 0x3dc) || + (code == 0x3de) || + (code == 0x3e0) || + (code == 0x3e2) || + (code == 0x3e4) || + (code == 0x3e6) || + (code == 0x3e8) || + (code == 0x3ea) || + (code == 0x3ec) || + (code == 0x3ee) || + (code == 0x3f4) || + ((code >= 0x400) && (code <= 0x42f)) || + (code == 0x460) || + (code == 0x462) || + (code == 0x464) || + (code == 0x466) || + (code == 0x468) || + (code == 0x46a) || + (code == 0x46c) || + (code == 0x46e) || + (code == 0x470) || + (code == 0x472) || + (code == 0x474) || + (code == 0x476) || + (code == 0x478) || + (code == 0x47a) || + (code == 0x47c) || + (code == 0x47e) || + (code == 0x480) || + (code == 0x48c) || + (code == 0x48e) || + (code == 0x490) || + (code == 0x492) || + (code == 0x494) || + (code == 0x496) || + (code == 0x498) || + (code == 0x49a) || + (code == 0x49c) || + (code == 0x49e) || + (code == 0x4a0) || + (code == 0x4a2) || + (code == 0x4a4) || + (code == 0x4a6) || + (code == 0x4a8) || + (code == 0x4aa) || + (code == 0x4ac) || + (code == 0x4ae) || + (code == 0x4b0) || + (code == 0x4b2) || + (code == 0x4b4) || + (code == 0x4b6) || + (code == 0x4b8) || + (code == 0x4ba) || + (code == 0x4bc) || + (code == 0x4be) || + ((code >= 0x4c0) && (code <= 0x4c1)) || + (code == 0x4c3) || + (code == 0x4c7) || + (code == 0x4cb) || + (code == 0x4d0) || + (code == 0x4d2) || + (code == 0x4d4) || + (code == 0x4d6) || + (code == 0x4d8) || + (code == 0x4da) || + (code == 0x4dc) || + (code == 0x4de) || + (code == 0x4e0) || + (code == 0x4e2) || + (code == 0x4e4) || + (code == 0x4e6) || + (code == 0x4e8) || + (code == 0x4ea) || + (code == 0x4ec) || + (code == 0x4ee) || + (code == 0x4f0) || + (code == 0x4f2) || + (code == 0x4f4) || + (code == 0x4f8) || + ((code >= 0x531) && (code <= 0x556)) || + ((code >= 0x10a0) && (code <= 0x10c5)) || + (code == 0x1e00) || + (code == 0x1e02) || + (code == 0x1e04) || + (code == 0x1e06) || + (code == 0x1e08) || + (code == 0x1e0a) || + (code == 0x1e0c) || + (code == 0x1e0e) || + (code == 0x1e10) || + (code == 0x1e12) || + (code == 0x1e14) || + (code == 0x1e16) || + (code == 0x1e18) || + (code == 0x1e1a) || + (code == 0x1e1c) || + (code == 0x1e1e) || + (code == 0x1e20) || + (code == 0x1e22) || + (code == 0x1e24) || + (code == 0x1e26) || + (code == 0x1e28) || + (code == 0x1e2a) || + (code == 0x1e2c) || + (code == 0x1e2e) || + (code == 0x1e30) || + (code == 0x1e32) || + (code == 0x1e34) || + (code == 0x1e36) || + (code == 0x1e38) || + (code == 0x1e3a) || + (code == 0x1e3c) || + (code == 0x1e3e) || + (code == 0x1e40) || + (code == 0x1e42) || + (code == 0x1e44) || + (code == 0x1e46) || + (code == 0x1e48) || + (code == 0x1e4a) || + (code == 0x1e4c) || + (code == 0x1e4e) || + (code == 0x1e50) || + (code == 0x1e52) || + (code == 0x1e54) || + (code == 0x1e56) || + (code == 0x1e58) || + (code == 0x1e5a) || + (code == 0x1e5c) || + (code == 0x1e5e) || + (code == 0x1e60) || + (code == 0x1e62) || + (code == 0x1e64) || + (code == 0x1e66) || + (code == 0x1e68) || + (code == 0x1e6a) || + (code == 0x1e6c) || + (code == 0x1e6e) || + (code == 0x1e70) || + (code == 0x1e72) || + (code == 0x1e74) || + (code == 0x1e76) || + (code == 0x1e78) || + (code == 0x1e7a) || + (code == 0x1e7c) || + (code == 0x1e7e) || + (code == 0x1e80) || + (code == 0x1e82) || + (code == 0x1e84) || + (code == 0x1e86) || + (code == 0x1e88) || + (code == 0x1e8a) || + (code == 0x1e8c) || + (code == 0x1e8e) || + (code == 0x1e90) || + (code == 0x1e92) || + (code == 0x1e94) || + (code == 0x1ea0) || + (code == 0x1ea2) || + (code == 0x1ea4) || + (code == 0x1ea6) || + (code == 0x1ea8) || + (code == 0x1eaa) || + (code == 0x1eac) || + (code == 0x1eae) || + (code == 0x1eb0) || + (code == 0x1eb2) || + (code == 0x1eb4) || + (code == 0x1eb6) || + (code == 0x1eb8) || + (code == 0x1eba) || + (code == 0x1ebc) || + (code == 0x1ebe) || + (code == 0x1ec0) || + (code == 0x1ec2) || + (code == 0x1ec4) || + (code == 0x1ec6) || + (code == 0x1ec8) || + (code == 0x1eca) || + (code == 0x1ecc) || + (code == 0x1ece) || + (code == 0x1ed0) || + (code == 0x1ed2) || + (code == 0x1ed4) || + (code == 0x1ed6) || + (code == 0x1ed8) || + (code == 0x1eda) || + (code == 0x1edc) || + (code == 0x1ede) || + (code == 0x1ee0) || + (code == 0x1ee2) || + (code == 0x1ee4) || + (code == 0x1ee6) || + (code == 0x1ee8) || + (code == 0x1eea) || + (code == 0x1eec) || + (code == 0x1eee) || + (code == 0x1ef0) || + (code == 0x1ef2) || + (code == 0x1ef4) || + (code == 0x1ef6) || + (code == 0x1ef8) || + ((code >= 0x1f08) && (code <= 0x1f0f)) || + ((code >= 0x1f18) && (code <= 0x1f1d)) || + ((code >= 0x1f28) && (code <= 0x1f2f)) || + ((code >= 0x1f38) && (code <= 0x1f3f)) || + ((code >= 0x1f48) && (code <= 0x1f4d)) || + (code == 0x1f59) || + (code == 0x1f5b) || + (code == 0x1f5d) || + (code == 0x1f5f) || + ((code >= 0x1f68) && (code <= 0x1f6f)) || + ((code >= 0x1fb8) && (code <= 0x1fbb)) || + ((code >= 0x1fc8) && (code <= 0x1fcb)) || + ((code >= 0x1fd8) && (code <= 0x1fdb)) || + ((code >= 0x1fe8) && (code <= 0x1fec)) || + ((code >= 0x1ff8) && (code <= 0x1ffb)) || + (code == 0x2102) || + (code == 0x2107) || + ((code >= 0x210b) && (code <= 0x210d)) || + ((code >= 0x2110) && (code <= 0x2112)) || + (code == 0x2115) || + ((code >= 0x2119) && (code <= 0x211d)) || + (code == 0x2124) || + (code == 0x2126) || + (code == 0x2128) || + ((code >= 0x212a) && (code <= 0x212d)) || + ((code >= 0x2130) && (code <= 0x2131)) || + (code == 0x2133) || + ((code >= 0xff21) && (code <= 0xff3a)) || + ((code >= 0x10400) && (code <= 0x10425)) || + ((code >= 0x1d400) && (code <= 0x1d419)) || + ((code >= 0x1d434) && (code <= 0x1d44d)) || + ((code >= 0x1d468) && (code <= 0x1d481)) || + (code == 0x1d49c) || + ((code >= 0x1d49e) && (code <= 0x1d49f)) || + (code == 0x1d4a2) || + ((code >= 0x1d4a5) && (code <= 0x1d4a6)) || + ((code >= 0x1d4a9) && (code <= 0x1d4ac)) || + ((code >= 0x1d4ae) && (code <= 0x1d4b5)) || + ((code >= 0x1d4d0) && (code <= 0x1d4e9)) || + ((code >= 0x1d504) && (code <= 0x1d505)) || + ((code >= 0x1d507) && (code <= 0x1d50a)) || + ((code >= 0x1d50d) && (code <= 0x1d514)) || + ((code >= 0x1d516) && (code <= 0x1d51c)) || + ((code >= 0x1d538) && (code <= 0x1d539)) || + ((code >= 0x1d53b) && (code <= 0x1d53e)) || + ((code >= 0x1d540) && (code <= 0x1d544)) || + (code == 0x1d546) || + ((code >= 0x1d54a) && (code <= 0x1d550)) || + ((code >= 0x1d56c) && (code <= 0x1d585)) || + ((code >= 0x1d5a0) && (code <= 0x1d5b9)) || + ((code >= 0x1d5d4) && (code <= 0x1d5ed)) || + ((code >= 0x1d608) && (code <= 0x1d621)) || + ((code >= 0x1d63c) && (code <= 0x1d655)) || + ((code >= 0x1d670) && (code <= 0x1d689)) || + ((code >= 0x1d6a8) && (code <= 0x1d6c0)) || + ((code >= 0x1d6e2) && (code <= 0x1d6fa)) || + ((code >= 0x1d71c) && (code <= 0x1d734)) || + ((code >= 0x1d756) && (code <= 0x1d76e)) || + ((code >= 0x1d790) && (code <= 0x1d7a8))); +} + +/** + * xmlUCSIsCatM: + * @code: UCS code point + * + * Check whether the character is part of M UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatM(int code) { + return(((code >= 0x300) && (code <= 0x34e)) || + ((code >= 0x360) && (code <= 0x362)) || + ((code >= 0x483) && (code <= 0x486)) || + ((code >= 0x488) && (code <= 0x489)) || + ((code >= 0x591) && (code <= 0x5a1)) || + ((code >= 0x5a3) && (code <= 0x5b9)) || + ((code >= 0x5bb) && (code <= 0x5bd)) || + (code == 0x5bf) || + ((code >= 0x5c1) && (code <= 0x5c2)) || + (code == 0x5c4) || + ((code >= 0x64b) && (code <= 0x655)) || + (code == 0x670) || + ((code >= 0x6d6) && (code <= 0x6e4)) || + ((code >= 0x6e7) && (code <= 0x6e8)) || + ((code >= 0x6ea) && (code <= 0x6ed)) || + (code == 0x711) || + ((code >= 0x730) && (code <= 0x74a)) || + ((code >= 0x7a6) && (code <= 0x7b0)) || + ((code >= 0x901) && (code <= 0x903)) || + (code == 0x93c) || + ((code >= 0x93e) && (code <= 0x94d)) || + ((code >= 0x951) && (code <= 0x954)) || + ((code >= 0x962) && (code <= 0x963)) || + ((code >= 0x981) && (code <= 0x983)) || + (code == 0x9bc) || + ((code >= 0x9be) && (code <= 0x9c4)) || + ((code >= 0x9c7) && (code <= 0x9c8)) || + ((code >= 0x9cb) && (code <= 0x9cd)) || + (code == 0x9d7) || + ((code >= 0x9e2) && (code <= 0x9e3)) || + (code == 0xa02) || + (code == 0xa3c) || + ((code >= 0xa3e) && (code <= 0xa42)) || + ((code >= 0xa47) && (code <= 0xa48)) || + ((code >= 0xa4b) && (code <= 0xa4d)) || + ((code >= 0xa70) && (code <= 0xa71)) || + ((code >= 0xa81) && (code <= 0xa83)) || + (code == 0xabc) || + ((code >= 0xabe) && (code <= 0xac5)) || + ((code >= 0xac7) && (code <= 0xac9)) || + ((code >= 0xacb) && (code <= 0xacd)) || + ((code >= 0xb01) && (code <= 0xb03)) || + (code == 0xb3c) || + ((code >= 0xb3e) && (code <= 0xb43)) || + ((code >= 0xb47) && (code <= 0xb48)) || + ((code >= 0xb4b) && (code <= 0xb4d)) || + ((code >= 0xb56) && (code <= 0xb57)) || + ((code >= 0xb82) && (code <= 0xb83)) || + ((code >= 0xbbe) && (code <= 0xbc2)) || + ((code >= 0xbc6) && (code <= 0xbc8)) || + ((code >= 0xbca) && (code <= 0xbcd)) || + (code == 0xbd7) || + ((code >= 0xc01) && (code <= 0xc03)) || + ((code >= 0xc3e) && (code <= 0xc44)) || + ((code >= 0xc46) && (code <= 0xc48)) || + ((code >= 0xc4a) && (code <= 0xc4d)) || + ((code >= 0xc55) && (code <= 0xc56)) || + ((code >= 0xc82) && (code <= 0xc83)) || + ((code >= 0xcbe) && (code <= 0xcc4)) || + ((code >= 0xcc6) && (code <= 0xcc8)) || + ((code >= 0xcca) && (code <= 0xccd)) || + ((code >= 0xcd5) && (code <= 0xcd6)) || + ((code >= 0xd02) && (code <= 0xd03)) || + ((code >= 0xd3e) && (code <= 0xd43)) || + ((code >= 0xd46) && (code <= 0xd48)) || + ((code >= 0xd4a) && (code <= 0xd4d)) || + (code == 0xd57) || + ((code >= 0xd82) && (code <= 0xd83)) || + (code == 0xdca) || + ((code >= 0xdcf) && (code <= 0xdd4)) || + (code == 0xdd6) || + ((code >= 0xdd8) && (code <= 0xddf)) || + ((code >= 0xdf2) && (code <= 0xdf3)) || + (code == 0xe31) || + ((code >= 0xe34) && (code <= 0xe3a)) || + ((code >= 0xe47) && (code <= 0xe4e)) || + (code == 0xeb1) || + ((code >= 0xeb4) && (code <= 0xeb9)) || + ((code >= 0xebb) && (code <= 0xebc)) || + ((code >= 0xec8) && (code <= 0xecd)) || + ((code >= 0xf18) && (code <= 0xf19)) || + (code == 0xf35) || + (code == 0xf37) || + (code == 0xf39) || + ((code >= 0xf3e) && (code <= 0xf3f)) || + ((code >= 0xf71) && (code <= 0xf84)) || + ((code >= 0xf86) && (code <= 0xf87)) || + ((code >= 0xf90) && (code <= 0xf97)) || + ((code >= 0xf99) && (code <= 0xfbc)) || + (code == 0xfc6) || + ((code >= 0x102c) && (code <= 0x1032)) || + ((code >= 0x1036) && (code <= 0x1039)) || + ((code >= 0x1056) && (code <= 0x1059)) || + ((code >= 0x17b4) && (code <= 0x17d3)) || + (code == 0x18a9) || + ((code >= 0x20d0) && (code <= 0x20e3)) || + ((code >= 0x302a) && (code <= 0x302f)) || + ((code >= 0x3099) && (code <= 0x309a)) || + (code == 0xfb1e) || + ((code >= 0xfe20) && (code <= 0xfe23)) || + ((code >= 0x1d165) && (code <= 0x1d169)) || + ((code >= 0x1d16d) && (code <= 0x1d172)) || + ((code >= 0x1d17b) && (code <= 0x1d182)) || + ((code >= 0x1d185) && (code <= 0x1d18b)) || + ((code >= 0x1d1aa) && (code <= 0x1d1ad))); +} + +/** + * xmlUCSIsCatMc: + * @code: UCS code point + * + * Check whether the character is part of Mc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMc(int code) { + return((code == 0x903) || + ((code >= 0x93e) && (code <= 0x940)) || + ((code >= 0x949) && (code <= 0x94c)) || + ((code >= 0x982) && (code <= 0x983)) || + ((code >= 0x9be) && (code <= 0x9c0)) || + ((code >= 0x9c7) && (code <= 0x9c8)) || + ((code >= 0x9cb) && (code <= 0x9cc)) || + (code == 0x9d7) || + ((code >= 0xa3e) && (code <= 0xa40)) || + (code == 0xa83) || + ((code >= 0xabe) && (code <= 0xac0)) || + (code == 0xac9) || + ((code >= 0xacb) && (code <= 0xacc)) || + ((code >= 0xb02) && (code <= 0xb03)) || + (code == 0xb3e) || + (code == 0xb40) || + ((code >= 0xb47) && (code <= 0xb48)) || + ((code >= 0xb4b) && (code <= 0xb4c)) || + (code == 0xb57) || + (code == 0xb83) || + ((code >= 0xbbe) && (code <= 0xbbf)) || + ((code >= 0xbc1) && (code <= 0xbc2)) || + ((code >= 0xbc6) && (code <= 0xbc8)) || + ((code >= 0xbca) && (code <= 0xbcc)) || + (code == 0xbd7) || + ((code >= 0xc01) && (code <= 0xc03)) || + ((code >= 0xc41) && (code <= 0xc44)) || + ((code >= 0xc82) && (code <= 0xc83)) || + (code == 0xcbe) || + ((code >= 0xcc0) && (code <= 0xcc4)) || + ((code >= 0xcc7) && (code <= 0xcc8)) || + ((code >= 0xcca) && (code <= 0xccb)) || + ((code >= 0xcd5) && (code <= 0xcd6)) || + ((code >= 0xd02) && (code <= 0xd03)) || + ((code >= 0xd3e) && (code <= 0xd40)) || + ((code >= 0xd46) && (code <= 0xd48)) || + ((code >= 0xd4a) && (code <= 0xd4c)) || + (code == 0xd57) || + ((code >= 0xd82) && (code <= 0xd83)) || + ((code >= 0xdcf) && (code <= 0xdd1)) || + ((code >= 0xdd8) && (code <= 0xddf)) || + ((code >= 0xdf2) && (code <= 0xdf3)) || + ((code >= 0xf3e) && (code <= 0xf3f)) || + (code == 0xf7f) || + (code == 0x102c) || + (code == 0x1031) || + (code == 0x1038) || + ((code >= 0x1056) && (code <= 0x1057)) || + ((code >= 0x17b4) && (code <= 0x17b6)) || + ((code >= 0x17be) && (code <= 0x17c5)) || + ((code >= 0x17c7) && (code <= 0x17c8)) || + ((code >= 0x1d165) && (code <= 0x1d166)) || + ((code >= 0x1d16d) && (code <= 0x1d172))); +} + +/** + * xmlUCSIsCatMe: + * @code: UCS code point + * + * Check whether the character is part of Me UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMe(int code) { + return(((code >= 0x488) && (code <= 0x489)) || + ((code >= 0x6dd) && (code <= 0x6de)) || + ((code >= 0x20dd) && (code <= 0x20e0)) || + ((code >= 0x20e2) && (code <= 0x20e3))); +} + +/** + * xmlUCSIsCatMn: + * @code: UCS code point + * + * Check whether the character is part of Mn UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatMn(int code) { + return(((code >= 0x300) && (code <= 0x34e)) || + ((code >= 0x360) && (code <= 0x362)) || + ((code >= 0x483) && (code <= 0x486)) || + ((code >= 0x591) && (code <= 0x5a1)) || + ((code >= 0x5a3) && (code <= 0x5b9)) || + ((code >= 0x5bb) && (code <= 0x5bd)) || + (code == 0x5bf) || + ((code >= 0x5c1) && (code <= 0x5c2)) || + (code == 0x5c4) || + ((code >= 0x64b) && (code <= 0x655)) || + (code == 0x670) || + ((code >= 0x6d6) && (code <= 0x6dc)) || + ((code >= 0x6df) && (code <= 0x6e4)) || + ((code >= 0x6e7) && (code <= 0x6e8)) || + ((code >= 0x6ea) && (code <= 0x6ed)) || + (code == 0x711) || + ((code >= 0x730) && (code <= 0x74a)) || + ((code >= 0x7a6) && (code <= 0x7b0)) || + ((code >= 0x901) && (code <= 0x902)) || + (code == 0x93c) || + ((code >= 0x941) && (code <= 0x948)) || + (code == 0x94d) || + ((code >= 0x951) && (code <= 0x954)) || + ((code >= 0x962) && (code <= 0x963)) || + (code == 0x981) || + (code == 0x9bc) || + ((code >= 0x9c1) && (code <= 0x9c4)) || + (code == 0x9cd) || + ((code >= 0x9e2) && (code <= 0x9e3)) || + (code == 0xa02) || + (code == 0xa3c) || + ((code >= 0xa41) && (code <= 0xa42)) || + ((code >= 0xa47) && (code <= 0xa48)) || + ((code >= 0xa4b) && (code <= 0xa4d)) || + ((code >= 0xa70) && (code <= 0xa71)) || + ((code >= 0xa81) && (code <= 0xa82)) || + (code == 0xabc) || + ((code >= 0xac1) && (code <= 0xac5)) || + ((code >= 0xac7) && (code <= 0xac8)) || + (code == 0xacd) || + (code == 0xb01) || + (code == 0xb3c) || + (code == 0xb3f) || + ((code >= 0xb41) && (code <= 0xb43)) || + (code == 0xb4d) || + (code == 0xb56) || + (code == 0xb82) || + (code == 0xbc0) || + (code == 0xbcd) || + ((code >= 0xc3e) && (code <= 0xc40)) || + ((code >= 0xc46) && (code <= 0xc48)) || + ((code >= 0xc4a) && (code <= 0xc4d)) || + ((code >= 0xc55) && (code <= 0xc56)) || + (code == 0xcbf) || + (code == 0xcc6) || + ((code >= 0xccc) && (code <= 0xccd)) || + ((code >= 0xd41) && (code <= 0xd43)) || + (code == 0xd4d) || + (code == 0xdca) || + ((code >= 0xdd2) && (code <= 0xdd4)) || + (code == 0xdd6) || + (code == 0xe31) || + ((code >= 0xe34) && (code <= 0xe3a)) || + ((code >= 0xe47) && (code <= 0xe4e)) || + (code == 0xeb1) || + ((code >= 0xeb4) && (code <= 0xeb9)) || + ((code >= 0xebb) && (code <= 0xebc)) || + ((code >= 0xec8) && (code <= 0xecd)) || + ((code >= 0xf18) && (code <= 0xf19)) || + (code == 0xf35) || + (code == 0xf37) || + (code == 0xf39) || + ((code >= 0xf71) && (code <= 0xf7e)) || + ((code >= 0xf80) && (code <= 0xf84)) || + ((code >= 0xf86) && (code <= 0xf87)) || + ((code >= 0xf90) && (code <= 0xf97)) || + ((code >= 0xf99) && (code <= 0xfbc)) || + (code == 0xfc6) || + ((code >= 0x102d) && (code <= 0x1030)) || + (code == 0x1032) || + ((code >= 0x1036) && (code <= 0x1037)) || + (code == 0x1039) || + ((code >= 0x1058) && (code <= 0x1059)) || + ((code >= 0x17b7) && (code <= 0x17bd)) || + (code == 0x17c6) || + ((code >= 0x17c9) && (code <= 0x17d3)) || + (code == 0x18a9) || + ((code >= 0x20d0) && (code <= 0x20dc)) || + (code == 0x20e1) || + ((code >= 0x302a) && (code <= 0x302f)) || + ((code >= 0x3099) && (code <= 0x309a)) || + (code == 0xfb1e) || + ((code >= 0xfe20) && (code <= 0xfe23)) || + ((code >= 0x1d167) && (code <= 0x1d169)) || + ((code >= 0x1d17b) && (code <= 0x1d182)) || + ((code >= 0x1d185) && (code <= 0x1d18b)) || + ((code >= 0x1d1aa) && (code <= 0x1d1ad))); +} + +/** + * xmlUCSIsCatN: + * @code: UCS code point + * + * Check whether the character is part of N UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatN(int code) { + return(((code >= 0x30) && (code <= 0x39)) || + ((code >= 0xb2) && (code <= 0xb3)) || + (code == 0xb9) || + ((code >= 0xbc) && (code <= 0xbe)) || + ((code >= 0x660) && (code <= 0x669)) || + ((code >= 0x6f0) && (code <= 0x6f9)) || + ((code >= 0x966) && (code <= 0x96f)) || + ((code >= 0x9e6) && (code <= 0x9ef)) || + ((code >= 0x9f4) && (code <= 0x9f9)) || + ((code >= 0xa66) && (code <= 0xa6f)) || + ((code >= 0xae6) && (code <= 0xaef)) || + ((code >= 0xb66) && (code <= 0xb6f)) || + ((code >= 0xbe7) && (code <= 0xbf2)) || + ((code >= 0xc66) && (code <= 0xc6f)) || + ((code >= 0xce6) && (code <= 0xcef)) || + ((code >= 0xd66) && (code <= 0xd6f)) || + ((code >= 0xe50) && (code <= 0xe59)) || + ((code >= 0xed0) && (code <= 0xed9)) || + ((code >= 0xf20) && (code <= 0xf33)) || + ((code >= 0x1040) && (code <= 0x1049)) || + ((code >= 0x1369) && (code <= 0x137c)) || + ((code >= 0x16ee) && (code <= 0x16f0)) || + ((code >= 0x17e0) && (code <= 0x17e9)) || + ((code >= 0x1810) && (code <= 0x1819)) || + (code == 0x2070) || + ((code >= 0x2074) && (code <= 0x2079)) || + ((code >= 0x2080) && (code <= 0x2089)) || + ((code >= 0x2153) && (code <= 0x2183)) || + ((code >= 0x2460) && (code <= 0x249b)) || + (code == 0x24ea) || + ((code >= 0x2776) && (code <= 0x2793)) || + (code == 0x3007) || + ((code >= 0x3021) && (code <= 0x3029)) || + ((code >= 0x3038) && (code <= 0x303a)) || + ((code >= 0x3192) && (code <= 0x3195)) || + ((code >= 0x3220) && (code <= 0x3229)) || + ((code >= 0x3280) && (code <= 0x3289)) || + ((code >= 0xff10) && (code <= 0xff19)) || + ((code >= 0x10320) && (code <= 0x10323)) || + (code == 0x1034a) || + ((code >= 0x1d7ce) && (code <= 0x1d7ff))); +} + +/** + * xmlUCSIsCatNd: + * @code: UCS code point + * + * Check whether the character is part of Nd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNd(int code) { + return(((code >= 0x30) && (code <= 0x39)) || + ((code >= 0x660) && (code <= 0x669)) || + ((code >= 0x6f0) && (code <= 0x6f9)) || + ((code >= 0x966) && (code <= 0x96f)) || + ((code >= 0x9e6) && (code <= 0x9ef)) || + ((code >= 0xa66) && (code <= 0xa6f)) || + ((code >= 0xae6) && (code <= 0xaef)) || + ((code >= 0xb66) && (code <= 0xb6f)) || + ((code >= 0xbe7) && (code <= 0xbef)) || + ((code >= 0xc66) && (code <= 0xc6f)) || + ((code >= 0xce6) && (code <= 0xcef)) || + ((code >= 0xd66) && (code <= 0xd6f)) || + ((code >= 0xe50) && (code <= 0xe59)) || + ((code >= 0xed0) && (code <= 0xed9)) || + ((code >= 0xf20) && (code <= 0xf29)) || + ((code >= 0x1040) && (code <= 0x1049)) || + ((code >= 0x1369) && (code <= 0x1371)) || + ((code >= 0x17e0) && (code <= 0x17e9)) || + ((code >= 0x1810) && (code <= 0x1819)) || + ((code >= 0xff10) && (code <= 0xff19)) || + ((code >= 0x1d7ce) && (code <= 0x1d7ff))); +} + +/** + * xmlUCSIsCatNl: + * @code: UCS code point + * + * Check whether the character is part of Nl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNl(int code) { + return(((code >= 0x16ee) && (code <= 0x16f0)) || + ((code >= 0x2160) && (code <= 0x2183)) || + (code == 0x3007) || + ((code >= 0x3021) && (code <= 0x3029)) || + ((code >= 0x3038) && (code <= 0x303a)) || + (code == 0x1034a)); +} + +/** + * xmlUCSIsCatNo: + * @code: UCS code point + * + * Check whether the character is part of No UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatNo(int code) { + return(((code >= 0xb2) && (code <= 0xb3)) || + (code == 0xb9) || + ((code >= 0xbc) && (code <= 0xbe)) || + ((code >= 0x9f4) && (code <= 0x9f9)) || + ((code >= 0xbf0) && (code <= 0xbf2)) || + ((code >= 0xf2a) && (code <= 0xf33)) || + ((code >= 0x1372) && (code <= 0x137c)) || + (code == 0x2070) || + ((code >= 0x2074) && (code <= 0x2079)) || + ((code >= 0x2080) && (code <= 0x2089)) || + ((code >= 0x2153) && (code <= 0x215f)) || + ((code >= 0x2460) && (code <= 0x249b)) || + (code == 0x24ea) || + ((code >= 0x2776) && (code <= 0x2793)) || + ((code >= 0x3192) && (code <= 0x3195)) || + ((code >= 0x3220) && (code <= 0x3229)) || + ((code >= 0x3280) && (code <= 0x3289)) || + ((code >= 0x10320) && (code <= 0x10323))); +} + +/** + * xmlUCSIsCatP: + * @code: UCS code point + * + * Check whether the character is part of P UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatP(int code) { + return(((code >= 0x21) && (code <= 0x23)) || + ((code >= 0x25) && (code <= 0x2a)) || + ((code >= 0x2c) && (code <= 0x2f)) || + ((code >= 0x3a) && (code <= 0x3b)) || + ((code >= 0x3f) && (code <= 0x40)) || + ((code >= 0x5b) && (code <= 0x5d)) || + (code == 0x5f) || + (code == 0x7b) || + (code == 0x7d) || + (code == 0xa1) || + (code == 0xab) || + (code == 0xad) || + (code == 0xb7) || + (code == 0xbb) || + (code == 0xbf) || + (code == 0x37e) || + (code == 0x387) || + ((code >= 0x55a) && (code <= 0x55f)) || + ((code >= 0x589) && (code <= 0x58a)) || + (code == 0x5be) || + (code == 0x5c0) || + (code == 0x5c3) || + ((code >= 0x5f3) && (code <= 0x5f4)) || + (code == 0x60c) || + (code == 0x61b) || + (code == 0x61f) || + ((code >= 0x66a) && (code <= 0x66d)) || + (code == 0x6d4) || + ((code >= 0x700) && (code <= 0x70d)) || + ((code >= 0x964) && (code <= 0x965)) || + (code == 0x970) || + (code == 0xdf4) || + (code == 0xe4f) || + ((code >= 0xe5a) && (code <= 0xe5b)) || + ((code >= 0xf04) && (code <= 0xf12)) || + ((code >= 0xf3a) && (code <= 0xf3d)) || + (code == 0xf85) || + ((code >= 0x104a) && (code <= 0x104f)) || + (code == 0x10fb) || + ((code >= 0x1361) && (code <= 0x1368)) || + ((code >= 0x166d) && (code <= 0x166e)) || + ((code >= 0x169b) && (code <= 0x169c)) || + ((code >= 0x16eb) && (code <= 0x16ed)) || + ((code >= 0x17d4) && (code <= 0x17da)) || + (code == 0x17dc) || + ((code >= 0x1800) && (code <= 0x180a)) || + ((code >= 0x2010) && (code <= 0x2027)) || + ((code >= 0x2030) && (code <= 0x2043)) || + ((code >= 0x2045) && (code <= 0x2046)) || + ((code >= 0x2048) && (code <= 0x204d)) || + ((code >= 0x207d) && (code <= 0x207e)) || + ((code >= 0x208d) && (code <= 0x208e)) || + ((code >= 0x2329) && (code <= 0x232a)) || + ((code >= 0x3001) && (code <= 0x3003)) || + ((code >= 0x3008) && (code <= 0x3011)) || + ((code >= 0x3014) && (code <= 0x301f)) || + (code == 0x3030) || + (code == 0x30fb) || + ((code >= 0xfd3e) && (code <= 0xfd3f)) || + ((code >= 0xfe30) && (code <= 0xfe44)) || + ((code >= 0xfe49) && (code <= 0xfe52)) || + ((code >= 0xfe54) && (code <= 0xfe61)) || + (code == 0xfe63) || + (code == 0xfe68) || + ((code >= 0xfe6a) && (code <= 0xfe6b)) || + ((code >= 0xff01) && (code <= 0xff03)) || + ((code >= 0xff05) && (code <= 0xff0a)) || + ((code >= 0xff0c) && (code <= 0xff0f)) || + ((code >= 0xff1a) && (code <= 0xff1b)) || + ((code >= 0xff1f) && (code <= 0xff20)) || + ((code >= 0xff3b) && (code <= 0xff3d)) || + (code == 0xff3f) || + (code == 0xff5b) || + (code == 0xff5d) || + ((code >= 0xff61) && (code <= 0xff65))); +} + +/** + * xmlUCSIsCatPc: + * @code: UCS code point + * + * Check whether the character is part of Pc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPc(int code) { + return((code == 0x5f) || + ((code >= 0x203f) && (code <= 0x2040)) || + (code == 0x30fb) || + ((code >= 0xfe33) && (code <= 0xfe34)) || + ((code >= 0xfe4d) && (code <= 0xfe4f)) || + (code == 0xff3f) || + (code == 0xff65)); +} + +/** + * xmlUCSIsCatPd: + * @code: UCS code point + * + * Check whether the character is part of Pd UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPd(int code) { + return((code == 0x2d) || + (code == 0xad) || + (code == 0x58a) || + (code == 0x1806) || + ((code >= 0x2010) && (code <= 0x2015)) || + (code == 0x301c) || + (code == 0x3030) || + ((code >= 0xfe31) && (code <= 0xfe32)) || + (code == 0xfe58) || + (code == 0xfe63) || + (code == 0xff0d)); +} + +/** + * xmlUCSIsCatPe: + * @code: UCS code point + * + * Check whether the character is part of Pe UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPe(int code) { + return((code == 0x29) || + (code == 0x5d) || + (code == 0x7d) || + (code == 0xf3b) || + (code == 0xf3d) || + (code == 0x169c) || + (code == 0x2046) || + (code == 0x207e) || + (code == 0x208e) || + (code == 0x232a) || + (code == 0x3009) || + (code == 0x300b) || + (code == 0x300d) || + (code == 0x300f) || + (code == 0x3011) || + (code == 0x3015) || + (code == 0x3017) || + (code == 0x3019) || + (code == 0x301b) || + ((code >= 0x301e) && (code <= 0x301f)) || + (code == 0xfd3f) || + (code == 0xfe36) || + (code == 0xfe38) || + (code == 0xfe3a) || + (code == 0xfe3c) || + (code == 0xfe3e) || + (code == 0xfe40) || + (code == 0xfe42) || + (code == 0xfe44) || + (code == 0xfe5a) || + (code == 0xfe5c) || + (code == 0xfe5e) || + (code == 0xff09) || + (code == 0xff3d) || + (code == 0xff5d) || + (code == 0xff63)); +} + +/** + * xmlUCSIsCatPf: + * @code: UCS code point + * + * Check whether the character is part of Pf UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPf(int code) { + return((code == 0xbb) || + (code == 0x2019) || + (code == 0x201d) || + (code == 0x203a)); +} + +/** + * xmlUCSIsCatPi: + * @code: UCS code point + * + * Check whether the character is part of Pi UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPi(int code) { + return((code == 0xab) || + (code == 0x2018) || + ((code >= 0x201b) && (code <= 0x201c)) || + (code == 0x201f) || + (code == 0x2039)); +} + +/** + * xmlUCSIsCatPo: + * @code: UCS code point + * + * Check whether the character is part of Po UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPo(int code) { + return(((code >= 0x21) && (code <= 0x23)) || + ((code >= 0x25) && (code <= 0x27)) || + (code == 0x2a) || + (code == 0x2c) || + ((code >= 0x2e) && (code <= 0x2f)) || + ((code >= 0x3a) && (code <= 0x3b)) || + ((code >= 0x3f) && (code <= 0x40)) || + (code == 0x5c) || + (code == 0xa1) || + (code == 0xb7) || + (code == 0xbf) || + (code == 0x37e) || + (code == 0x387) || + ((code >= 0x55a) && (code <= 0x55f)) || + (code == 0x589) || + (code == 0x5be) || + (code == 0x5c0) || + (code == 0x5c3) || + ((code >= 0x5f3) && (code <= 0x5f4)) || + (code == 0x60c) || + (code == 0x61b) || + (code == 0x61f) || + ((code >= 0x66a) && (code <= 0x66d)) || + (code == 0x6d4) || + ((code >= 0x700) && (code <= 0x70d)) || + ((code >= 0x964) && (code <= 0x965)) || + (code == 0x970) || + (code == 0xdf4) || + (code == 0xe4f) || + ((code >= 0xe5a) && (code <= 0xe5b)) || + ((code >= 0xf04) && (code <= 0xf12)) || + (code == 0xf85) || + ((code >= 0x104a) && (code <= 0x104f)) || + (code == 0x10fb) || + ((code >= 0x1361) && (code <= 0x1368)) || + ((code >= 0x166d) && (code <= 0x166e)) || + ((code >= 0x16eb) && (code <= 0x16ed)) || + ((code >= 0x17d4) && (code <= 0x17da)) || + (code == 0x17dc) || + ((code >= 0x1800) && (code <= 0x1805)) || + ((code >= 0x1807) && (code <= 0x180a)) || + ((code >= 0x2016) && (code <= 0x2017)) || + ((code >= 0x2020) && (code <= 0x2027)) || + ((code >= 0x2030) && (code <= 0x2038)) || + ((code >= 0x203b) && (code <= 0x203e)) || + ((code >= 0x2041) && (code <= 0x2043)) || + ((code >= 0x2048) && (code <= 0x204d)) || + ((code >= 0x3001) && (code <= 0x3003)) || + (code == 0xfe30) || + ((code >= 0xfe49) && (code <= 0xfe4c)) || + ((code >= 0xfe50) && (code <= 0xfe52)) || + ((code >= 0xfe54) && (code <= 0xfe57)) || + ((code >= 0xfe5f) && (code <= 0xfe61)) || + (code == 0xfe68) || + ((code >= 0xfe6a) && (code <= 0xfe6b)) || + ((code >= 0xff01) && (code <= 0xff03)) || + ((code >= 0xff05) && (code <= 0xff07)) || + (code == 0xff0a) || + (code == 0xff0c) || + ((code >= 0xff0e) && (code <= 0xff0f)) || + ((code >= 0xff1a) && (code <= 0xff1b)) || + ((code >= 0xff1f) && (code <= 0xff20)) || + (code == 0xff3c) || + (code == 0xff61) || + (code == 0xff64)); +} + +/** + * xmlUCSIsCatPs: + * @code: UCS code point + * + * Check whether the character is part of Ps UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatPs(int code) { + return((code == 0x28) || + (code == 0x5b) || + (code == 0x7b) || + (code == 0xf3a) || + (code == 0xf3c) || + (code == 0x169b) || + (code == 0x201a) || + (code == 0x201e) || + (code == 0x2045) || + (code == 0x207d) || + (code == 0x208d) || + (code == 0x2329) || + (code == 0x3008) || + (code == 0x300a) || + (code == 0x300c) || + (code == 0x300e) || + (code == 0x3010) || + (code == 0x3014) || + (code == 0x3016) || + (code == 0x3018) || + (code == 0x301a) || + (code == 0x301d) || + (code == 0xfd3e) || + (code == 0xfe35) || + (code == 0xfe37) || + (code == 0xfe39) || + (code == 0xfe3b) || + (code == 0xfe3d) || + (code == 0xfe3f) || + (code == 0xfe41) || + (code == 0xfe43) || + (code == 0xfe59) || + (code == 0xfe5b) || + (code == 0xfe5d) || + (code == 0xff08) || + (code == 0xff3b) || + (code == 0xff5b) || + (code == 0xff62)); +} + +/** + * xmlUCSIsCatS: + * @code: UCS code point + * + * Check whether the character is part of S UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatS(int code) { + return((code == 0x24) || + (code == 0x2b) || + ((code >= 0x3c) && (code <= 0x3e)) || + (code == 0x5e) || + (code == 0x60) || + (code == 0x7c) || + (code == 0x7e) || + ((code >= 0xa2) && (code <= 0xa9)) || + (code == 0xac) || + ((code >= 0xae) && (code <= 0xb1)) || + (code == 0xb4) || + (code == 0xb6) || + (code == 0xb8) || + (code == 0xd7) || + (code == 0xf7) || + ((code >= 0x2b9) && (code <= 0x2ba)) || + ((code >= 0x2c2) && (code <= 0x2cf)) || + ((code >= 0x2d2) && (code <= 0x2df)) || + ((code >= 0x2e5) && (code <= 0x2ed)) || + ((code >= 0x374) && (code <= 0x375)) || + ((code >= 0x384) && (code <= 0x385)) || + (code == 0x482) || + (code == 0x6e9) || + ((code >= 0x6fd) && (code <= 0x6fe)) || + ((code >= 0x9f2) && (code <= 0x9f3)) || + (code == 0x9fa) || + (code == 0xb70) || + (code == 0xe3f) || + ((code >= 0xf01) && (code <= 0xf03)) || + ((code >= 0xf13) && (code <= 0xf17)) || + ((code >= 0xf1a) && (code <= 0xf1f)) || + (code == 0xf34) || + (code == 0xf36) || + (code == 0xf38) || + ((code >= 0xfbe) && (code <= 0xfc5)) || + ((code >= 0xfc7) && (code <= 0xfcc)) || + (code == 0xfcf) || + (code == 0x17db) || + (code == 0x1fbd) || + ((code >= 0x1fbf) && (code <= 0x1fc1)) || + ((code >= 0x1fcd) && (code <= 0x1fcf)) || + ((code >= 0x1fdd) && (code <= 0x1fdf)) || + ((code >= 0x1fed) && (code <= 0x1fef)) || + ((code >= 0x1ffd) && (code <= 0x1ffe)) || + (code == 0x2044) || + ((code >= 0x207a) && (code <= 0x207c)) || + ((code >= 0x208a) && (code <= 0x208c)) || + ((code >= 0x20a0) && (code <= 0x20af)) || + ((code >= 0x2100) && (code <= 0x2101)) || + ((code >= 0x2103) && (code <= 0x2106)) || + ((code >= 0x2108) && (code <= 0x2109)) || + (code == 0x2114) || + ((code >= 0x2116) && (code <= 0x2118)) || + ((code >= 0x211e) && (code <= 0x2123)) || + (code == 0x2125) || + (code == 0x2127) || + (code == 0x2129) || + (code == 0x212e) || + (code == 0x2132) || + (code == 0x213a) || + ((code >= 0x2190) && (code <= 0x21f3)) || + ((code >= 0x2200) && (code <= 0x22f1)) || + ((code >= 0x2300) && (code <= 0x2328)) || + ((code >= 0x232b) && (code <= 0x237b)) || + ((code >= 0x237d) && (code <= 0x239a)) || + ((code >= 0x2400) && (code <= 0x2426)) || + ((code >= 0x2440) && (code <= 0x244a)) || + ((code >= 0x249c) && (code <= 0x24e9)) || + ((code >= 0x2500) && (code <= 0x2595)) || + ((code >= 0x25a0) && (code <= 0x25f7)) || + ((code >= 0x2600) && (code <= 0x2613)) || + ((code >= 0x2619) && (code <= 0x2671)) || + ((code >= 0x2701) && (code <= 0x2704)) || + ((code >= 0x2706) && (code <= 0x2709)) || + ((code >= 0x270c) && (code <= 0x2727)) || + ((code >= 0x2729) && (code <= 0x274b)) || + (code == 0x274d) || + ((code >= 0x274f) && (code <= 0x2752)) || + (code == 0x2756) || + ((code >= 0x2758) && (code <= 0x275e)) || + ((code >= 0x2761) && (code <= 0x2767)) || + (code == 0x2794) || + ((code >= 0x2798) && (code <= 0x27af)) || + ((code >= 0x27b1) && (code <= 0x27be)) || + ((code >= 0x2800) && (code <= 0x28ff)) || + ((code >= 0x2e80) && (code <= 0x2e99)) || + ((code >= 0x2e9b) && (code <= 0x2ef3)) || + ((code >= 0x2f00) && (code <= 0x2fd5)) || + ((code >= 0x2ff0) && (code <= 0x2ffb)) || + (code == 0x3004) || + ((code >= 0x3012) && (code <= 0x3013)) || + (code == 0x3020) || + ((code >= 0x3036) && (code <= 0x3037)) || + ((code >= 0x303e) && (code <= 0x303f)) || + ((code >= 0x309b) && (code <= 0x309c)) || + ((code >= 0x3190) && (code <= 0x3191)) || + ((code >= 0x3196) && (code <= 0x319f)) || + ((code >= 0x3200) && (code <= 0x321c)) || + ((code >= 0x322a) && (code <= 0x3243)) || + ((code >= 0x3260) && (code <= 0x327b)) || + (code == 0x327f) || + ((code >= 0x328a) && (code <= 0x32b0)) || + ((code >= 0x32c0) && (code <= 0x32cb)) || + ((code >= 0x32d0) && (code <= 0x32fe)) || + ((code >= 0x3300) && (code <= 0x3376)) || + ((code >= 0x337b) && (code <= 0x33dd)) || + ((code >= 0x33e0) && (code <= 0x33fe)) || + ((code >= 0xa490) && (code <= 0xa4a1)) || + ((code >= 0xa4a4) && (code <= 0xa4b3)) || + ((code >= 0xa4b5) && (code <= 0xa4c0)) || + ((code >= 0xa4c2) && (code <= 0xa4c4)) || + (code == 0xa4c6) || + (code == 0xfb29) || + (code == 0xfe62) || + ((code >= 0xfe64) && (code <= 0xfe66)) || + (code == 0xfe69) || + (code == 0xff04) || + (code == 0xff0b) || + ((code >= 0xff1c) && (code <= 0xff1e)) || + (code == 0xff3e) || + (code == 0xff40) || + (code == 0xff5c) || + (code == 0xff5e) || + ((code >= 0xffe0) && (code <= 0xffe6)) || + ((code >= 0xffe8) && (code <= 0xffee)) || + ((code >= 0xfffc) && (code <= 0xfffd)) || + ((code >= 0x1d000) && (code <= 0x1d0f5)) || + ((code >= 0x1d100) && (code <= 0x1d126)) || + ((code >= 0x1d12a) && (code <= 0x1d164)) || + ((code >= 0x1d16a) && (code <= 0x1d16c)) || + ((code >= 0x1d183) && (code <= 0x1d184)) || + ((code >= 0x1d18c) && (code <= 0x1d1a9)) || + ((code >= 0x1d1ae) && (code <= 0x1d1dd)) || + (code == 0x1d6c1) || + (code == 0x1d6db) || + (code == 0x1d6fb) || + (code == 0x1d715) || + (code == 0x1d735) || + (code == 0x1d74f) || + (code == 0x1d76f) || + (code == 0x1d789) || + (code == 0x1d7a9) || + (code == 0x1d7c3)); +} + +/** + * xmlUCSIsCatSc: + * @code: UCS code point + * + * Check whether the character is part of Sc UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSc(int code) { + return((code == 0x24) || + ((code >= 0xa2) && (code <= 0xa5)) || + ((code >= 0x9f2) && (code <= 0x9f3)) || + (code == 0xe3f) || + (code == 0x17db) || + ((code >= 0x20a0) && (code <= 0x20af)) || + (code == 0xfe69) || + (code == 0xff04) || + ((code >= 0xffe0) && (code <= 0xffe1)) || + ((code >= 0xffe5) && (code <= 0xffe6))); +} + +/** + * xmlUCSIsCatSk: + * @code: UCS code point + * + * Check whether the character is part of Sk UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSk(int code) { + return((code == 0x5e) || + (code == 0x60) || + (code == 0xa8) || + (code == 0xaf) || + (code == 0xb4) || + (code == 0xb8) || + ((code >= 0x2b9) && (code <= 0x2ba)) || + ((code >= 0x2c2) && (code <= 0x2cf)) || + ((code >= 0x2d2) && (code <= 0x2df)) || + ((code >= 0x2e5) && (code <= 0x2ed)) || + ((code >= 0x374) && (code <= 0x375)) || + ((code >= 0x384) && (code <= 0x385)) || + (code == 0x1fbd) || + ((code >= 0x1fbf) && (code <= 0x1fc1)) || + ((code >= 0x1fcd) && (code <= 0x1fcf)) || + ((code >= 0x1fdd) && (code <= 0x1fdf)) || + ((code >= 0x1fed) && (code <= 0x1fef)) || + ((code >= 0x1ffd) && (code <= 0x1ffe)) || + ((code >= 0x309b) && (code <= 0x309c)) || + (code == 0xff3e) || + (code == 0xff40) || + (code == 0xffe3)); +} + +/** + * xmlUCSIsCatSm: + * @code: UCS code point + * + * Check whether the character is part of Sm UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSm(int code) { + return((code == 0x2b) || + ((code >= 0x3c) && (code <= 0x3e)) || + (code == 0x7c) || + (code == 0x7e) || + (code == 0xac) || + (code == 0xb1) || + (code == 0xd7) || + (code == 0xf7) || + (code == 0x2044) || + ((code >= 0x207a) && (code <= 0x207c)) || + ((code >= 0x208a) && (code <= 0x208c)) || + ((code >= 0x2190) && (code <= 0x2194)) || + ((code >= 0x219a) && (code <= 0x219b)) || + (code == 0x21a0) || + (code == 0x21a3) || + (code == 0x21a6) || + (code == 0x21ae) || + ((code >= 0x21ce) && (code <= 0x21cf)) || + (code == 0x21d2) || + (code == 0x21d4) || + ((code >= 0x2200) && (code <= 0x22f1)) || + ((code >= 0x2308) && (code <= 0x230b)) || + ((code >= 0x2320) && (code <= 0x2321)) || + (code == 0x25b7) || + (code == 0x25c1) || + (code == 0x266f) || + (code == 0xfb29) || + (code == 0xfe62) || + ((code >= 0xfe64) && (code <= 0xfe66)) || + (code == 0xff0b) || + ((code >= 0xff1c) && (code <= 0xff1e)) || + (code == 0xff5c) || + (code == 0xff5e) || + (code == 0xffe2) || + ((code >= 0xffe9) && (code <= 0xffec)) || + (code == 0x1d6c1) || + (code == 0x1d6db) || + (code == 0x1d6fb) || + (code == 0x1d715) || + (code == 0x1d735) || + (code == 0x1d74f) || + (code == 0x1d76f) || + (code == 0x1d789) || + (code == 0x1d7a9) || + (code == 0x1d7c3)); +} + +/** + * xmlUCSIsCatSo: + * @code: UCS code point + * + * Check whether the character is part of So UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatSo(int code) { + return(((code >= 0xa6) && (code <= 0xa7)) || + (code == 0xa9) || + (code == 0xae) || + (code == 0xb0) || + (code == 0xb6) || + (code == 0x482) || + (code == 0x6e9) || + ((code >= 0x6fd) && (code <= 0x6fe)) || + (code == 0x9fa) || + (code == 0xb70) || + ((code >= 0xf01) && (code <= 0xf03)) || + ((code >= 0xf13) && (code <= 0xf17)) || + ((code >= 0xf1a) && (code <= 0xf1f)) || + (code == 0xf34) || + (code == 0xf36) || + (code == 0xf38) || + ((code >= 0xfbe) && (code <= 0xfc5)) || + ((code >= 0xfc7) && (code <= 0xfcc)) || + (code == 0xfcf) || + ((code >= 0x2100) && (code <= 0x2101)) || + ((code >= 0x2103) && (code <= 0x2106)) || + ((code >= 0x2108) && (code <= 0x2109)) || + (code == 0x2114) || + ((code >= 0x2116) && (code <= 0x2118)) || + ((code >= 0x211e) && (code <= 0x2123)) || + (code == 0x2125) || + (code == 0x2127) || + (code == 0x2129) || + (code == 0x212e) || + (code == 0x2132) || + (code == 0x213a) || + ((code >= 0x2195) && (code <= 0x2199)) || + ((code >= 0x219c) && (code <= 0x219f)) || + ((code >= 0x21a1) && (code <= 0x21a2)) || + ((code >= 0x21a4) && (code <= 0x21a5)) || + ((code >= 0x21a7) && (code <= 0x21ad)) || + ((code >= 0x21af) && (code <= 0x21cd)) || + ((code >= 0x21d0) && (code <= 0x21d1)) || + (code == 0x21d3) || + ((code >= 0x21d5) && (code <= 0x21f3)) || + ((code >= 0x2300) && (code <= 0x2307)) || + ((code >= 0x230c) && (code <= 0x231f)) || + ((code >= 0x2322) && (code <= 0x2328)) || + ((code >= 0x232b) && (code <= 0x237b)) || + ((code >= 0x237d) && (code <= 0x239a)) || + ((code >= 0x2400) && (code <= 0x2426)) || + ((code >= 0x2440) && (code <= 0x244a)) || + ((code >= 0x249c) && (code <= 0x24e9)) || + ((code >= 0x2500) && (code <= 0x2595)) || + ((code >= 0x25a0) && (code <= 0x25b6)) || + ((code >= 0x25b8) && (code <= 0x25c0)) || + ((code >= 0x25c2) && (code <= 0x25f7)) || + ((code >= 0x2600) && (code <= 0x2613)) || + ((code >= 0x2619) && (code <= 0x266e)) || + ((code >= 0x2670) && (code <= 0x2671)) || + ((code >= 0x2701) && (code <= 0x2704)) || + ((code >= 0x2706) && (code <= 0x2709)) || + ((code >= 0x270c) && (code <= 0x2727)) || + ((code >= 0x2729) && (code <= 0x274b)) || + (code == 0x274d) || + ((code >= 0x274f) && (code <= 0x2752)) || + (code == 0x2756) || + ((code >= 0x2758) && (code <= 0x275e)) || + ((code >= 0x2761) && (code <= 0x2767)) || + (code == 0x2794) || + ((code >= 0x2798) && (code <= 0x27af)) || + ((code >= 0x27b1) && (code <= 0x27be)) || + ((code >= 0x2800) && (code <= 0x28ff)) || + ((code >= 0x2e80) && (code <= 0x2e99)) || + ((code >= 0x2e9b) && (code <= 0x2ef3)) || + ((code >= 0x2f00) && (code <= 0x2fd5)) || + ((code >= 0x2ff0) && (code <= 0x2ffb)) || + (code == 0x3004) || + ((code >= 0x3012) && (code <= 0x3013)) || + (code == 0x3020) || + ((code >= 0x3036) && (code <= 0x3037)) || + ((code >= 0x303e) && (code <= 0x303f)) || + ((code >= 0x3190) && (code <= 0x3191)) || + ((code >= 0x3196) && (code <= 0x319f)) || + ((code >= 0x3200) && (code <= 0x321c)) || + ((code >= 0x322a) && (code <= 0x3243)) || + ((code >= 0x3260) && (code <= 0x327b)) || + (code == 0x327f) || + ((code >= 0x328a) && (code <= 0x32b0)) || + ((code >= 0x32c0) && (code <= 0x32cb)) || + ((code >= 0x32d0) && (code <= 0x32fe)) || + ((code >= 0x3300) && (code <= 0x3376)) || + ((code >= 0x337b) && (code <= 0x33dd)) || + ((code >= 0x33e0) && (code <= 0x33fe)) || + ((code >= 0xa490) && (code <= 0xa4a1)) || + ((code >= 0xa4a4) && (code <= 0xa4b3)) || + ((code >= 0xa4b5) && (code <= 0xa4c0)) || + ((code >= 0xa4c2) && (code <= 0xa4c4)) || + (code == 0xa4c6) || + (code == 0xffe4) || + (code == 0xffe8) || + ((code >= 0xffed) && (code <= 0xffee)) || + ((code >= 0xfffc) && (code <= 0xfffd)) || + ((code >= 0x1d000) && (code <= 0x1d0f5)) || + ((code >= 0x1d100) && (code <= 0x1d126)) || + ((code >= 0x1d12a) && (code <= 0x1d164)) || + ((code >= 0x1d16a) && (code <= 0x1d16c)) || + ((code >= 0x1d183) && (code <= 0x1d184)) || + ((code >= 0x1d18c) && (code <= 0x1d1a9)) || + ((code >= 0x1d1ae) && (code <= 0x1d1dd))); +} + +/** + * xmlUCSIsCatZ: + * @code: UCS code point + * + * Check whether the character is part of Z UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZ(int code) { + return((code == 0x20) || + (code == 0xa0) || + (code == 0x1680) || + ((code >= 0x2000) && (code <= 0x200b)) || + ((code >= 0x2028) && (code <= 0x2029)) || + (code == 0x202f) || + (code == 0x3000)); +} + +/** + * xmlUCSIsCatZl: + * @code: UCS code point + * + * Check whether the character is part of Zl UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZl(int code) { + return((code == 0x2028)); +} + +/** + * xmlUCSIsCatZp: + * @code: UCS code point + * + * Check whether the character is part of Zp UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZp(int code) { + return((code == 0x2029)); +} + +/** + * xmlUCSIsCatZs: + * @code: UCS code point + * + * Check whether the character is part of Zs UCS Category + * + * Returns 1 if true 0 otherwise + */ +int +xmlUCSIsCatZs(int code) { + return((code == 0x20) || + (code == 0xa0) || + (code == 0x1680) || + ((code >= 0x2000) && (code <= 0x200b)) || + (code == 0x202f) || + (code == 0x3000)); +} + +/** + * xmlUCSIsCat: + * @code: UCS code point + * @cat: UCS Category name + * + * Check whether the caracter is part of the UCS Category + * + * Returns 1 if true, 0 if false and -1 on unknown category + */ +int +xmlUCSIsCat(int code, const char *cat) { + if (!strcmp(cat, "C")) + return(xmlUCSIsCatC(code)); + if (!strcmp(cat, "Cc")) + return(xmlUCSIsCatCc(code)); + if (!strcmp(cat, "Cf")) + return(xmlUCSIsCatCf(code)); + if (!strcmp(cat, "Co")) + return(xmlUCSIsCatCo(code)); + if (!strcmp(cat, "Cs")) + return(xmlUCSIsCatCs(code)); + if (!strcmp(cat, "L")) + return(xmlUCSIsCatL(code)); + if (!strcmp(cat, "Ll")) + return(xmlUCSIsCatLl(code)); + if (!strcmp(cat, "Lm")) + return(xmlUCSIsCatLm(code)); + if (!strcmp(cat, "Lo")) + return(xmlUCSIsCatLo(code)); + if (!strcmp(cat, "Lt")) + return(xmlUCSIsCatLt(code)); + if (!strcmp(cat, "Lu")) + return(xmlUCSIsCatLu(code)); + if (!strcmp(cat, "M")) + return(xmlUCSIsCatM(code)); + if (!strcmp(cat, "Mc")) + return(xmlUCSIsCatMc(code)); + if (!strcmp(cat, "Me")) + return(xmlUCSIsCatMe(code)); + if (!strcmp(cat, "Mn")) + return(xmlUCSIsCatMn(code)); + if (!strcmp(cat, "N")) + return(xmlUCSIsCatN(code)); + if (!strcmp(cat, "Nd")) + return(xmlUCSIsCatNd(code)); + if (!strcmp(cat, "Nl")) + return(xmlUCSIsCatNl(code)); + if (!strcmp(cat, "No")) + return(xmlUCSIsCatNo(code)); + if (!strcmp(cat, "P")) + return(xmlUCSIsCatP(code)); + if (!strcmp(cat, "Pc")) + return(xmlUCSIsCatPc(code)); + if (!strcmp(cat, "Pd")) + return(xmlUCSIsCatPd(code)); + if (!strcmp(cat, "Pe")) + return(xmlUCSIsCatPe(code)); + if (!strcmp(cat, "Pf")) + return(xmlUCSIsCatPf(code)); + if (!strcmp(cat, "Pi")) + return(xmlUCSIsCatPi(code)); + if (!strcmp(cat, "Po")) + return(xmlUCSIsCatPo(code)); + if (!strcmp(cat, "Ps")) + return(xmlUCSIsCatPs(code)); + if (!strcmp(cat, "S")) + return(xmlUCSIsCatS(code)); + if (!strcmp(cat, "Sc")) + return(xmlUCSIsCatSc(code)); + if (!strcmp(cat, "Sk")) + return(xmlUCSIsCatSk(code)); + if (!strcmp(cat, "Sm")) + return(xmlUCSIsCatSm(code)); + if (!strcmp(cat, "So")) + return(xmlUCSIsCatSo(code)); + if (!strcmp(cat, "Z")) + return(xmlUCSIsCatZ(code)); + if (!strcmp(cat, "Zl")) + return(xmlUCSIsCatZl(code)); + if (!strcmp(cat, "Zp")) + return(xmlUCSIsCatZp(code)); + if (!strcmp(cat, "Zs")) + return(xmlUCSIsCatZs(code)); + return(-1); +} + + +#endif /* LIBXML_UNICODE_ENABLED */