From b3de70c28294f6c7f7ef8ec0033fb302b0dfbe26 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 2 Dec 2003 22:32:15 +0000 Subject: [PATCH] adding the pattern node selection code. Inheried in part from libxslt but * pattern.c include/libxml/pattern.h: adding the pattern node selection code. Inheried in part from libxslt but smaller. * Makefile.am configure.in include/libxml/xmlversion.h.in: integrated the pattern module, made it a configure time option * xmllint.c: added --pattern to test when doing --stream Daniel --- ChangeLog | 8 + Makefile.am | 4 +- configure.in | 17 + include/libxml/pattern.h | 47 ++ include/libxml/xmlversion.h.in | 9 + pattern.c | 918 +++++++++++++++++++++++++++++++++ xmllint.c | 61 ++- 7 files changed, 1058 insertions(+), 6 deletions(-) create mode 100644 include/libxml/pattern.h create mode 100644 pattern.c diff --git a/ChangeLog b/ChangeLog index a995fef3..4607de17 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Tue Dec 2 23:29:56 CET 2003 Daniel Veillard + + * pattern.c include/libxml/pattern.h: adding the pattern node + selection code. Inheried in part from libxslt but smaller. + * Makefile.am configure.in include/libxml/xmlversion.h.in: + integrated the pattern module, made it a configure time option + * xmllint.c: added --pattern to test when doing --stream + Tue Dec 2 11:25:25 CET 2003 Daniel Veillard * xmlreader.c: fixed a problem in xmlreader validation when diff --git a/Makefile.am b/Makefile.am index 11745e51..f10ed55a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,7 +27,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ catalog.c globals.c threads.c c14n.c \ xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \ triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \ - xmlwriter.c legacy.c chvalid.c + xmlwriter.c legacy.c chvalid.c pattern.c else libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \ @@ -36,7 +36,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ catalog.c globals.c threads.c c14n.c \ xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \ xmlreader.c relaxng.c dict.c SAX2.c \ - xmlwriter.c legacy.c chvalid.c + xmlwriter.c legacy.c chvalid.c pattern.c endif DEPS = $(top_builddir)/libxml2.la diff --git a/configure.in b/configure.in index 141ee4cb..edfd4189 100644 --- a/configure.in +++ b/configure.in @@ -622,6 +622,23 @@ fi AC_SUBST(WITH_READER) AC_SUBST(READER_TEST) +AC_ARG_WITH(pattern, +[ --with-pattern add the xmlPattern selection interface (on)]) +if test "$with_minimum" = "yes" -a "$with_pattern" = "" +then + with_pattern=no +fi +if test "$with_pattern" = "no" ; then + echo Disabling the xmlPattern parsing interface + WITH_PATTERN=0 + PATTERN_TEST= +else + WITH_PATTERN=1 + PATTERN_TEST=Patterntests +fi +AC_SUBST(WITH_PATTERN) +AC_SUBST(PATTERN_TEST) + AC_ARG_WITH(writer, [ --with-writer add the xmlWriter saving interface (on)]) if test "$with_minimum" = "yes" -a "$with_writer" = "" diff --git a/include/libxml/pattern.h b/include/libxml/pattern.h new file mode 100644 index 00000000..6aa7de17 --- /dev/null +++ b/include/libxml/pattern.h @@ -0,0 +1,47 @@ +/* + * Summary: pattern expression handling + * Description: allows to compile and test pattern expressions for nodes + * either in a tree or based on a parser state. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_PATTERN_H__ +#define __XML_PATTERN_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlPattern: + * + * A compiled (XPath based) pattern to select nodes + */ +typedef struct _xmlPattern xmlPattern; +typedef xmlPattern *xmlPatternPtr; + +XMLPUBFUN void XMLCALL + xmlFreePattern (xmlPatternPtr comp); + +XMLPUBFUN void XMLCALL + xmlFreePatternList (xmlPatternPtr comp); + +XMLPUBFUN xmlPatternPtr XMLCALL + xmlPatterncompile (const xmlChar *pattern, + xmlDict *dictionnary, + int flags); +XMLPUBFUN int XMLCALL + xmlPatternMatch (xmlPatternPtr comp, + xmlNodePtr node); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_PATTERN_H__ */ diff --git a/include/libxml/xmlversion.h.in b/include/libxml/xmlversion.h.in index dd78b882..fcef1cc1 100644 --- a/include/libxml/xmlversion.h.in +++ b/include/libxml/xmlversion.h.in @@ -120,6 +120,15 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); #define LIBXML_READER_ENABLED #endif +/** + * LIBXML_PATTERN_ENABLED: + * + * Whether the xmlPattern node selection interface is configured in + */ +#if @WITH_PATTERN@ +#define LIBXML_PATTERN_ENABLED +#endif + /** * LIBXML_WRITER_ENABLED: * diff --git a/pattern.c b/pattern.c new file mode 100644 index 00000000..e0f1566a --- /dev/null +++ b/pattern.c @@ -0,0 +1,918 @@ +/* + * pattern.c: Implemetation of selectors for nodes + * + * Reference: + * http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/ + * to some extent + * http://www.w3.org/TR/1999/REC-xml-19991116 + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#define IN_LIBXML +#include "libxml.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef LIBXML_PATTERN_ENABLED + +#define ERROR(a, b, c, d) +#define ERROR5(a, b, c, d, e) + +/* + * Types are private: + */ + +typedef enum { + XML_OP_END=0, + XML_OP_ROOT, + XML_OP_ELEM, + XML_OP_CHILD, + XML_OP_ATTR, + XML_OP_PARENT, + XML_OP_ANCESTOR, + XML_OP_NS, + XML_OP_ALL +} xmlPatOp; + + +typedef struct _xmlStepOp xmlStepOp; +typedef xmlStepOp *xmlStepOpPtr; +struct _xmlStepOp { + xmlPatOp op; + const xmlChar *value; + const xmlChar *value2; +}; + +struct _xmlPattern { + void *data; /* the associated template */ + struct _xmlPattern *next; /* siblings */ + const xmlChar *pattern; /* the pattern */ + + /* TODO fix the statically allocated size steps[] */ + int nbStep; + int maxStep; + xmlStepOp steps[10]; /* ops for computation */ +}; + +typedef struct _xmlPatParserContext xmlPatParserContext; +typedef xmlPatParserContext *xmlPatParserContextPtr; +struct _xmlPatParserContext { + const xmlChar *cur; /* the current char being parsed */ + const xmlChar *base; /* the full expression */ + int error; /* error code */ + xmlDictPtr dict; /* the dictionnary if any */ + xmlPatternPtr comp; /* the result */ + xmlNodePtr elem; /* the current node if any */ +}; + +/************************************************************************ + * * + * Type functions * + * * + ************************************************************************/ + +/** + * xmlNewPattern: + * + * Create a new XSLT Pattern + * + * Returns the newly allocated xmlPatternPtr or NULL in case of error + */ +static xmlPatternPtr +xmlNewPattern(void) { + xmlPatternPtr cur; + + cur = (xmlPatternPtr) xmlMalloc(sizeof(xmlPattern)); + if (cur == NULL) { + ERROR(NULL, NULL, NULL, + "xmlNewPattern : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlPattern)); + cur->maxStep = 10; + return(cur); +} + +/** + * xmlFreePattern: + * @comp: an XSLT comp + * + * Free up the memory allocated by @comp + */ +void +xmlFreePattern(xmlPatternPtr comp) { + xmlStepOpPtr op; + int i; + + if (comp == NULL) + return; + if (comp->pattern != NULL) + xmlFree((xmlChar *)comp->pattern); + for (i = 0;i < comp->nbStep;i++) { + op = &comp->steps[i]; + if (op->value != NULL) + xmlFree((xmlChar *) op->value); + if (op->value2 != NULL) + xmlFree((xmlChar *) op->value2); + } + memset(comp, -1, sizeof(xmlPattern)); + xmlFree(comp); +} + +/** + * xmlFreePatternList: + * @comp: an XSLT comp list + * + * Free up the memory allocated by all the elements of @comp + */ +void +xmlFreePatternList(xmlPatternPtr comp) { + xmlPatternPtr cur; + + while (comp != NULL) { + cur = comp; + comp = comp->next; + xmlFreePattern(cur); + } +} + +/** + * xmlNewPatParserContext: + * @pattern: the pattern context + * @ctxt: the transformation context, if done at run-time + * + * Create a new XML pattern parser context + * + * Returns the newly allocated xmlPatParserContextPtr or NULL in case of error + */ +static xmlPatParserContextPtr +xmlNewPatParserContext(const xmlChar *pattern, xmlDictPtr dict) { + xmlPatParserContextPtr cur; + + if (pattern == NULL) + return(NULL); + + cur = (xmlPatParserContextPtr) xmlMalloc(sizeof(xmlPatParserContext)); + if (cur == NULL) { + ERROR(NULL, NULL, NULL, + "xmlNewPatParserContext : malloc failed\n"); + return(NULL); + } + memset(cur, 0, sizeof(xmlPatParserContext)); + cur->dict = dict; + cur->cur = pattern; + cur->base = pattern; + return(cur); +} + +/** + * xmlFreePatParserContext: + * @ctxt: an XSLT parser context + * + * Free up the memory allocated by @ctxt + */ +static void +xmlFreePatParserContext(xmlPatParserContextPtr ctxt) { + if (ctxt == NULL) + return; + memset(ctxt, -1, sizeof(xmlPatParserContext)); + xmlFree(ctxt); +} + +/** + * xmlPatternAdd: + * @comp: the compiled match expression + * @op: an op + * @value: the first value + * @value2: the second value + * + * Add an step to an XSLT Compiled Match + * + * Returns -1 in case of failure, 0 otherwise. + */ +static int +xmlPatternAdd(xmlPatParserContextPtr ctxt ATTRIBUTE_UNUSED, + xmlPatternPtr comp, + xmlPatOp op, xmlChar * value, xmlChar * value2) +{ + if (comp->nbStep >= 10) { + ERROR(ctxt, NULL, NULL, + "xmlPatternAdd: overflow\n"); + return (-1); + } + comp->steps[comp->nbStep].op = op; + comp->steps[comp->nbStep].value = value; + comp->steps[comp->nbStep].value2 = value2; + comp->nbStep++; + return (0); +} + +#if 0 +/** + * xsltSwapTopPattern: + * @comp: the compiled match expression + * + * reverse the two top steps. + */ +static void +xsltSwapTopPattern(xmlPatternPtr comp) { + int i; + int j = comp->nbStep - 1; + + if (j > 0) { + register const xmlChar *tmp; + register xmlPatOp op; + i = j - 1; + tmp = comp->steps[i].value; + comp->steps[i].value = comp->steps[j].value; + comp->steps[j].value = tmp; + tmp = comp->steps[i].value2; + comp->steps[i].value2 = comp->steps[j].value2; + comp->steps[j].value2 = tmp; + op = comp->steps[i].op; + comp->steps[i].op = comp->steps[j].op; + comp->steps[j].op = op; + } +} +#endif + +/** + * xmlReversePattern: + * @comp: the compiled match expression + * + * reverse all the stack of expressions + */ +static void +xmlReversePattern(xmlPatternPtr comp) { + int i = 0; + int j = comp->nbStep - 1; + + while (j > i) { + register const xmlChar *tmp; + register xmlPatOp op; + tmp = comp->steps[i].value; + comp->steps[i].value = comp->steps[j].value; + comp->steps[j].value = tmp; + tmp = comp->steps[i].value2; + comp->steps[i].value2 = comp->steps[j].value2; + comp->steps[j].value2 = tmp; + op = comp->steps[i].op; + comp->steps[i].op = comp->steps[j].op; + comp->steps[j].op = op; + j--; + i++; + } + comp->steps[comp->nbStep++].op = XML_OP_END; +} + +/************************************************************************ + * * + * The interpreter for the precompiled patterns * + * * + ************************************************************************/ + +/** + * xmlPatMatch: + * @comp: the precompiled pattern + * @node: a node + * + * Test wether the node matches the pattern + * + * Returns 1 if it matches, 0 if it doesn't and -1 in case of failure + */ +static int +xmlPatMatch(xmlPatternPtr comp, xmlNodePtr node) { + int i; + xmlStepOpPtr step; + + if ((comp == NULL) || (node == NULL)) return(-1); + for (i = 0;i < comp->nbStep;i++) { + step = &comp->steps[i]; + switch (step->op) { + case XML_OP_END: + return(1); + case XML_OP_ROOT: + if ((node->type == XML_DOCUMENT_NODE) || +#ifdef LIBXML_DOCB_ENABLED + (node->type == XML_DOCB_DOCUMENT_NODE) || +#endif + (node->type == XML_HTML_DOCUMENT_NODE)) + continue; + return(0); + case XML_OP_ELEM: + if (node->type != XML_ELEMENT_NODE) + return(0); + if (step->value == NULL) + continue; + if (step->value[0] != node->name[0]) + return(0); + if (!xmlStrEqual(step->value, node->name)) + return(0); + + /* Namespace test */ + if (node->ns == NULL) { + if (step->value2 != NULL) + return(0); + } else if (node->ns->href != NULL) { + if (step->value2 == NULL) + return(0); + if (!xmlStrEqual(step->value2, node->ns->href)) + return(0); + } + continue; + case XML_OP_CHILD: { + xmlNodePtr lst; + + if ((node->type != XML_ELEMENT_NODE) && + (node->type != XML_DOCUMENT_NODE) && +#ifdef LIBXML_DOCB_ENABLED + (node->type != XML_DOCB_DOCUMENT_NODE) && +#endif + (node->type != XML_HTML_DOCUMENT_NODE)) + return(0); + + lst = node->children; + + if (step->value != NULL) { + while (lst != NULL) { + if ((lst->type == XML_ELEMENT_NODE) && + (step->value[0] == lst->name[0]) && + (xmlStrEqual(step->value, lst->name))) + break; + lst = lst->next; + } + if (lst != NULL) + continue; + } + return(0); + } + case XML_OP_ATTR: + if (node->type != XML_ATTRIBUTE_NODE) + return(0); + if (step->value != NULL) { + if (step->value[0] != node->name[0]) + return(0); + if (!xmlStrEqual(step->value, node->name)) + return(0); + } + /* Namespace test */ + if (node->ns == NULL) { + if (step->value2 != NULL) + return(0); + } else if (step->value2 != NULL) { + if (!xmlStrEqual(step->value2, node->ns->href)) + return(0); + } + continue; + case XML_OP_PARENT: + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE) || +#ifdef LIBXML_DOCB_ENABLED + (node->type == XML_DOCB_DOCUMENT_NODE) || +#endif + (node->type == XML_NAMESPACE_DECL)) + return(0); + node = node->parent; + if (node == NULL) + return(0); + if (step->value == NULL) + continue; + if (step->value[0] != node->name[0]) + return(0); + if (!xmlStrEqual(step->value, node->name)) + return(0); + /* Namespace test */ + if (node->ns == NULL) { + if (step->value2 != NULL) + return(0); + } else if (node->ns->href != NULL) { + if (step->value2 == NULL) + return(0); + if (!xmlStrEqual(step->value2, node->ns->href)) + return(0); + } + continue; + case XML_OP_ANCESTOR: + /* TODO: implement coalescing of ANCESTOR/NODE ops */ + if (step->value == NULL) { + i++; + step = &comp->steps[i]; + if (step->op == XML_OP_ROOT) + return(1); + if (step->op != XML_OP_ELEM) + return(0); + if (step->value == NULL) + return(-1); + } + if (node == NULL) + return(0); + if ((node->type == XML_DOCUMENT_NODE) || + (node->type == XML_HTML_DOCUMENT_NODE) || +#ifdef LIBXML_DOCB_ENABLED + (node->type == XML_DOCB_DOCUMENT_NODE) || +#endif + (node->type == XML_NAMESPACE_DECL)) + return(0); + node = node->parent; + while (node != NULL) { + if (node == NULL) + return(0); + if ((node->type == XML_ELEMENT_NODE) && + (step->value[0] == node->name[0]) && + (xmlStrEqual(step->value, node->name))) { + /* Namespace test */ + if (node->ns == NULL) { + if (step->value2 == NULL) + break; + } else if (node->ns->href != NULL) { + if ((step->value2 != NULL) && + (xmlStrEqual(step->value2, node->ns->href))) + break; + } + } + node = node->parent; + } + if (node == NULL) + return(0); + continue; + case XML_OP_NS: + if (node->type != XML_ELEMENT_NODE) + return(0); + if (node->ns == NULL) { + if (step->value != NULL) + return(0); + } else if (node->ns->href != NULL) { + if (step->value == NULL) + return(0); + if (!xmlStrEqual(step->value, node->ns->href)) + return(0); + } + break; + case XML_OP_ALL: + if (node->type != XML_ELEMENT_NODE) + return(0); + break; + } + } + return(1); +} + +/************************************************************************ + * * + * Dedicated parser for templates * + * * + ************************************************************************/ + +#define TODO \ + xmlGenericError(xmlGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); +#define CUR (*ctxt->cur) +#define SKIP(val) ctxt->cur += (val) +#define NXT(val) ctxt->cur[(val)] +#define CUR_PTR ctxt->cur + +#define SKIP_BLANKS \ + while (IS_BLANK(CUR)) NEXT + +#define CURRENT (*ctxt->cur) +#define NEXT ((*ctxt->cur) ? ctxt->cur++: ctxt->cur) + + +#define PUSH(op, val, val2) \ + if (xmlPatternAdd(ctxt, ctxt->comp, (op), (val), (val2))) goto error; + +#define XSLT_ERROR(X) \ + { xsltError(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return; } + +#define XSLT_ERROR0(X) \ + { xsltError(ctxt, __FILE__, __LINE__, X); \ + ctxt->error = (X); return(0); } + +#if 0 +/** + * xmlPatScanLiteral: + * @ctxt: the XPath Parser context + * + * Parse an XPath Litteral: + * + * [29] Literal ::= '"' [^"]* '"' + * | "'" [^']* "'" + * + * Returns the Literal parsed or NULL + */ + +static xmlChar * +xmlPatScanLiteral(xmlPatParserContextPtr ctxt) { + const xmlChar *q, *cur; + xmlChar *ret = NULL; + int val, len; + + SKIP_BLANKS; + if (CUR == '"') { + NEXT; + cur = q = CUR_PTR; + val = xmlStringCurrentChar(NULL, cur, &len); + while ((IS_CHAR(val)) && (val != '"')) { + cur += len; + val = xmlStringCurrentChar(NULL, cur, &len); + } + if (!IS_CHAR(val)) { + ctxt->error = 1; + return(NULL); + } else { + ret = xmlStrndup(q, cur - q); + } + cur += len; + CUR_PTR = cur; + } else if (CUR == '\'') { + NEXT; + cur = q = CUR_PTR; + val = xmlStringCurrentChar(NULL, cur, &len); + while ((IS_CHAR(val)) && (val != '\'')) { + cur += len; + val = xmlStringCurrentChar(NULL, cur, &len); + } + if (!IS_CHAR(val)) { + ctxt->error = 1; + return(NULL); + } else { + ret = xmlStrndup(q, cur - q); + } + cur += len; + CUR_PTR = cur; + } else { + /* XP_ERROR(XPATH_START_LITERAL_ERROR); */ + ctxt->error = 1; + return(NULL); + } + return(ret); +} +#endif + +/** + * xmlPatScanName: + * @ctxt: the XPath Parser context + * + * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | + * CombiningChar | Extender + * + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * + * [6] Names ::= Name (S Name)* + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +xmlPatScanName(xmlPatParserContextPtr ctxt) { + const xmlChar *q, *cur; + xmlChar *ret = NULL; + int val, len; + + SKIP_BLANKS; + + cur = q = CUR_PTR; + val = xmlStringCurrentChar(NULL, cur, &len); + if (!IS_LETTER(val) && (val != '_') && (val != ':')) + return(NULL); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + cur += len; + val = xmlStringCurrentChar(NULL, cur, &len); + } + ret = xmlStrndup(q, cur - q); + CUR_PTR = cur; + return(ret); +} + +/** + * xmlPatScanNCName: + * @ctxt: the XPath Parser context + * + * Parses a non qualified name + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +xmlPatScanNCName(xmlPatParserContextPtr ctxt) { + const xmlChar *q, *cur; + xmlChar *ret = NULL; + int val, len; + + SKIP_BLANKS; + + cur = q = CUR_PTR; + val = xmlStringCurrentChar(NULL, cur, &len); + if (!IS_LETTER(val) && (val != '_')) + return(NULL); + + while ((IS_LETTER(val)) || (IS_DIGIT(val)) || + (val == '.') || (val == '-') || + (val == '_') || + (IS_COMBINING(val)) || + (IS_EXTENDER(val))) { + cur += len; + val = xmlStringCurrentChar(NULL, cur, &len); + } + ret = xmlStrndup(q, cur - q); + CUR_PTR = cur; + return(ret); +} + +#if 0 +/** + * xmlPatScanQName: + * @ctxt: the XPath Parser context + * @prefix: the place to store the prefix + * + * Parse a qualified name + * + * Returns the Name parsed or NULL + */ + +static xmlChar * +xmlPatScanQName(xmlPatParserContextPtr ctxt, xmlChar **prefix) { + xmlChar *ret = NULL; + + *prefix = NULL; + ret = xmlPatScanNCName(ctxt); + if (CUR == ':') { + *prefix = ret; + NEXT; + ret = xmlPatScanNCName(ctxt); + } + return(ret); +} +#endif + +/** + * xmlCompileStepPattern: + * @ctxt: the compilation context + * + * Compile the Step Pattern and generates a precompiled + * form suitable for fast matching. + * + * [3] Step ::= '.' | NameTest + * [4] NameTest ::= QName | '*' | NCName ':' '*' + */ + +static void +xmlCompileStepPattern(xmlPatParserContextPtr ctxt) { + xmlChar *token = NULL; + xmlChar *name = NULL; + const xmlChar *URI = NULL; + xmlChar *URL = NULL; + + SKIP_BLANKS; + if (CUR == '.') { + NEXT; + PUSH(XML_OP_ELEM, NULL, NULL); + return; + } + name = xmlPatScanNCName(ctxt); + if (name == NULL) { + if (CUR == '*') { + NEXT; + PUSH(XML_OP_ALL, NULL, NULL); + return; + } else { + ERROR(NULL, NULL, NULL, + "xmlCompileStepPattern : Name expected\n"); + ctxt->error = 1; + return; + } + } + SKIP_BLANKS; + if (CUR == ':') { + NEXT; + if (CUR != ':') { + xmlChar *prefix = name; + xmlNsPtr ns; + + /* + * This is a namespace match + */ + token = xmlPatScanName(ctxt); + ns = xmlSearchNs(NULL, ctxt->elem, prefix); + if (ns == NULL) { + ERROR5(NULL, NULL, NULL, + "xmlCompileStepPattern : no namespace bound to prefix %s\n", + prefix); + ctxt->error = 1; + goto error; + } else { + URL = xmlStrdup(ns->href); + } + xmlFree(prefix); + if (token == NULL) { + if (CUR == '*') { + NEXT; + PUSH(XML_OP_NS, URL, NULL); + } else { + ERROR(NULL, NULL, NULL, + "xmlCompileStepPattern : Name expected\n"); + ctxt->error = 1; + goto error; + } + } else { + PUSH(XML_OP_ELEM, token, URL); + } + } else { + NEXT; + if (xmlStrEqual(token, (const xmlChar *) "child")) { + xmlFree(token); + token = xmlPatScanName(ctxt); + if (token == NULL) { + if (CUR == '*') { + NEXT; + PUSH(XML_OP_ALL, token, NULL); + return; + } else { + ERROR(NULL, NULL, NULL, + "xmlCompileStepPattern : QName expected\n"); + ctxt->error = 1; + goto error; + } + } + TODO + /* URI = xsltGetQNameURI(ctxt->elem, &token); */ + if (token == NULL) { + ctxt->error = 1; + goto error; + } else { + name = xmlStrdup(token); + if (URI != NULL) + URL = xmlStrdup(URI); + } + PUSH(XML_OP_CHILD, name, URL); + } else if (xmlStrEqual(token, (const xmlChar *) "attribute")) { + xmlFree(token); + token = xmlPatScanName(ctxt); + if (token == NULL) { + ERROR(NULL, NULL, NULL, + "xmlCompileStepPattern : QName expected\n"); + ctxt->error = 1; + goto error; + } + TODO + /* URI = xsltGetQNameURI(ctxt->elem, &token); */ + if (token == NULL) { + ctxt->error = 1; + goto error; + } else { + name = xmlStrdup(token); + if (URI != NULL) + URL = xmlStrdup(URI); + } + PUSH(XML_OP_ATTR, name, URL); + } else { + ERROR(NULL, NULL, NULL, + "xmlCompileStepPattern : 'child' or 'attribute' expected\n"); + ctxt->error = 1; + goto error; + } + xmlFree(token); + } + } else if (CUR == '*') { + NEXT; + PUSH(XML_OP_ALL, token, NULL); + } else { + if (name == NULL) { + ctxt->error = 1; + goto error; + } + PUSH(XML_OP_ELEM, name, NULL); + } + return; +error: + if (token != NULL) + xmlFree(token); + if (name != NULL) + xmlFree(name); +} + +/** + * xmlCompilePathPattern: + * @ctxt: the compilation context + * + * Compile the Path Pattern and generates a precompiled + * form suitable for fast matching. + * + * [5] Path ::= ('.//')? ( Step '/' )* ( Step | '@' NameTest ) + */ +static void +xmlCompilePathPattern(xmlPatParserContextPtr ctxt) { + SKIP_BLANKS; + if ((CUR == '/') && (NXT(1) == '/')) { + /* + * since we reverse the query + * a leading // can be safely ignored + */ + NEXT; + NEXT; + } else if ((CUR == '.') && (NXT(1) == '/') && (NXT(2) == '/')) { + /* + * a leading .// can be safely ignored + */ + NEXT; + NEXT; + NEXT; + } + if (CUR == '@') { + TODO + } else { + xmlCompileStepPattern(ctxt); + SKIP_BLANKS; + while (CUR == '/') { + if ((CUR == '/') && (NXT(1) == '/')) { + PUSH(XML_OP_ANCESTOR, NULL, NULL); + NEXT; + NEXT; + SKIP_BLANKS; + xmlCompileStepPattern(ctxt); + } else { + PUSH(XML_OP_PARENT, NULL, NULL); + NEXT; + SKIP_BLANKS; + if ((CUR != 0) || (CUR == '|')) { + xmlCompileStepPattern(ctxt); + } + } + } + } +error: + return; +} +/************************************************************************ + * * + * The public interfaces * + * * + ************************************************************************/ + +/** + * xmlPatterncompile: + * @pattern: the pattern to compile + * @dict: an optional dictionnary for interned strings + * @flags: compilation flags, undefined yet + * + * Compile a pattern + * + * Returns the compiled for of the pattern or NULL in case of error + */ +xmlPatternPtr +xmlPatterncompile(const xmlChar *pattern, xmlDict *dictionnary, int flags) { + xmlPatternPtr ret = NULL; + xmlPatParserContextPtr ctxt = NULL; + + ctxt = xmlNewPatParserContext(pattern, dictionnary); + if (ctxt == NULL) goto error; + ret = xmlNewPattern(); + if (ret == NULL) goto error; + ctxt->comp = ret; + + xmlCompilePathPattern(ctxt); + xmlFreePatParserContext(ctxt); + + xmlReversePattern(ret); + return(ret); +error: + if (ctxt != NULL) xmlFreePatParserContext(ctxt); + if (ret != NULL) xmlFreePattern(ret); + return(NULL); +} + +/** + * xmlPatternMatch: + * @comp: the precompiled pattern + * @node: a node + * + * Test wether the node matches the pattern + * + * Returns 1 if it matches, 0 if it doesn't and -1 in case of failure + */ +int +xmlPatternMatch(xmlPatternPtr comp, xmlNodePtr node) +{ + if ((comp == NULL) || (node == NULL)) + return(-1); + return(xmlPatMatch(comp, node)); +} + +#endif /* LIBXML_PATTERN_ENABLED */ diff --git a/xmllint.c b/xmllint.c index 92b1c3e8..8cc8e581 100644 --- a/xmllint.c +++ b/xmllint.c @@ -89,6 +89,9 @@ #include #include #endif +#ifdef LIBXML_PATTERN_ENABLED +#include +#endif #ifndef XML_XML_DEFAULT_CATALOG #define XML_XML_DEFAULT_CATALOG "file:///etc/xml/catalog" @@ -160,6 +163,10 @@ static int chkregister = 0; #ifdef LIBXML_SAX1_ENABLED static int sax1 = 0; #endif /* LIBXML_SAX1_ENABLED */ +#ifdef LIBXML_PATTERN_ENABLED +static const char *pattern = NULL; +static xmlPatternPtr patternc = NULL; +#endif static int options = 0; /* @@ -620,6 +627,13 @@ static void processNode(xmlTextReaderPtr reader) { else { printf(" %s\n", value); } +#ifdef LIBXML_PATTERN_ENABLED + if (patternc) { + if (xmlPatternMatch(patternc, xmlTextReaderCurrentNode(reader)) == 1) { + printf("Node matches pattern %s\n", pattern); + } + } +#endif } static void streamFile(char *filename) { @@ -680,7 +694,11 @@ static void streamFile(char *filename) { } ret = xmlTextReaderRead(reader); while (ret == 1) { - if (debug) + if ((debug) +#ifdef LIBXML_PATTERN_ENABLED + || (patternc) +#endif + ) processNode(reader); ret = xmlTextReaderRead(reader); } @@ -748,7 +766,11 @@ static void walkDoc(xmlDocPtr doc) { } ret = xmlTextReaderRead(reader); while (ret == 1) { - if (debug) + if ((debug) +#ifdef LIBXML_PATTERN_ENABLED + || (patternc) +#endif + ) processNode(reader); ret = xmlTextReaderRead(reader); } @@ -1451,7 +1473,7 @@ static void usage(const char *name) { #ifdef LIBXML_CATALOG_ENABLED printf("\t--catalogs : use SGML catalogs from $SGML_CATALOG_FILES\n"); printf("\t otherwise XML Catalogs starting from \n"); - printf("\t " XML_XML_DEFAULT_CATALOG " are activated by default\n"); + printf("\t %s are activated by default\n", XML_XML_DEFAULT_CATALOG); printf("\t--nocatalogs: deactivate all catalogs\n"); #endif printf("\t--auto : generate a small doc on the fly\n"); @@ -1464,6 +1486,9 @@ static void usage(const char *name) { printf("\t--stream : use the streaming interface to process very large files\n"); printf("\t--walker : create a reader and walk though the resulting doc\n"); #endif /* LIBXML_READER_ENABLED */ +#ifdef LIBXML_PATTERN_ENABLED + printf("\t--pattern pattern_value : test the pattern support\n"); +#endif printf("\t--chkregister : verify the node registration code\n"); #ifdef LIBXML_SCHEMAS_ENABLED printf("\t--relaxng schema : do RelaxNG validation against the schema\n"); @@ -1732,6 +1757,12 @@ main(int argc, char **argv) { } else if ((!strcmp(argv[i], "-nonet")) || (!strcmp(argv[i], "--nonet"))) { options |= XML_PARSE_NONET; +#ifdef LIBXML_PATTERN_ENABLED + } else if ((!strcmp(argv[i], "-pattern")) || + (!strcmp(argv[i], "--pattern"))) { + i++; + pattern = argv[i]; +#endif } else { fprintf(stderr, "Unknown option %s\n", argv[i]); usage(argv[0]); @@ -1847,7 +1878,18 @@ main(int argc, char **argv) { endTimer("Compiling the schemas"); } } -#endif +#endif /* LIBXML_SCHEMAS_ENABLED */ +#ifdef LIBXML_PATTERN_ENABLED + if (pattern != NULL) { + patternc = xmlPatterncompile((const xmlChar *) pattern, NULL, 0); + if (patternc == NULL) { + xmlGenericError(xmlGenericErrorContext, + "Pattern %s failed to compile\n", pattern); + progresult = 7; + pattern = NULL; + } + } +#endif /* LIBXML_PATTERN_ENABLED */ for (i = 1; i < argc ; i++) { if ((!strcmp(argv[i], "-encode")) || (!strcmp(argv[i], "--encode"))) { @@ -1881,6 +1923,13 @@ main(int argc, char **argv) { i++; continue; } +#ifdef LIBXML_PATTERN_ENABLED + if ((!strcmp(argv[i], "-pattern")) || + (!strcmp(argv[i], "--pattern"))) { + i++; + continue; + } +#endif if ((timing) && (repeat)) startTimer(); /* Remember file names. "-" means stdin. */ @@ -1931,6 +1980,10 @@ main(int argc, char **argv) { if (wxschemas != NULL) xmlSchemaFree(wxschemas); xmlRelaxNGCleanupTypes(); +#endif +#ifdef LIBXML_PATTERN_ENABLED + if (patternc != NULL) + xmlFreePattern(patternc); #endif xmlCleanupParser(); xmlMemoryDump();