Added the Expand() and Next() operation to work on subtrees within the

* xmlreader.c include/libxml/xmlreader.h: Added the Expand()
  and Next() operation to work on subtrees within the reader
  framework.
* doc/libxml2-api.xml python/libxml2class.txt: resulting updates
* python/tests/reader5.py: added an example for those new
  functions of the reader.
Daniel
This commit is contained in:
Daniel Veillard 2003-04-11 09:02:11 +00:00
parent 645c690d49
commit c6cae7b690
6 changed files with 196 additions and 3 deletions

View File

@ -1,3 +1,12 @@
Fri Apr 11 10:59:24 CEST 2003 Daniel Veillard <daniel@veillard.com>
* xmlreader.c include/libxml/xmlreader.h: Added the Expand()
and Next() operation to work on subtrees within the reader
framework.
* doc/libxml2-api.xml python/libxml2class.txt: resulting updates
* python/tests/reader5.py: added an example for those new
functions of the reader.
Thu Apr 10 23:38:13 CEST 2003 Daniel Veillard <daniel@veillard.com>
* HTMLtree.c: patch from Vasily Tchekalkin to fix #109865

View File

@ -883,7 +883,6 @@
<exports symbol='xmlRemoveID'/>
<exports symbol='xmlRemoveRef'/>
<exports symbol='xmlSnprintfElementContent'/>
<exports symbol='xmlSplitQName2'/>
<exports symbol='xmlSprintfElementContent'/>
<exports symbol='xmlValidBuildContentModel'/>
<exports symbol='xmlValidCtxt'/>
@ -929,6 +928,7 @@
<exports symbol='_htmlEntityDesc'/>
<exports symbol='htmlAttrAllowed'/>
<exports symbol='htmlAutoCloseTag'/>
<exports symbol='htmlCreateMemoryParserCtxt'/>
<exports symbol='htmlCreatePushParserCtxt'/>
<exports symbol='htmlDefaultSubelement'/>
<exports symbol='htmlDocPtr'/>
@ -1127,6 +1127,7 @@
<exports symbol='xmlTextReaderCurrentNode'/>
<exports symbol='xmlTextReaderDepth'/>
<exports symbol='xmlTextReaderErrorFunc'/>
<exports symbol='xmlTextReaderExpand'/>
<exports symbol='xmlTextReaderGetAttribute'/>
<exports symbol='xmlTextReaderGetAttributeNo'/>
<exports symbol='xmlTextReaderGetAttributeNs'/>
@ -1150,6 +1151,7 @@
<exports symbol='xmlTextReaderMoveToNextAttribute'/>
<exports symbol='xmlTextReaderName'/>
<exports symbol='xmlTextReaderNamespaceUri'/>
<exports symbol='xmlTextReaderNext'/>
<exports symbol='xmlTextReaderNodeType'/>
<exports symbol='xmlTextReaderNormalization'/>
<exports symbol='xmlTextReaderPrefix'/>
@ -1552,6 +1554,7 @@
<exports symbol='xmlBufferWriteCHAR'/>
<exports symbol='xmlBufferWriteChar'/>
<exports symbol='xmlBufferWriteQuotedString'/>
<exports symbol='xmlBuildQName'/>
<exports symbol='xmlChar'/>
<exports symbol='xmlChildrenNode'/>
<exports symbol='xmlCopyDoc'/>
@ -1701,6 +1704,7 @@
<exports symbol='xmlSetNsProp'/>
<exports symbol='xmlSetProp'/>
<exports symbol='xmlSetTreeDoc'/>
<exports symbol='xmlSplitQName2'/>
<exports symbol='xmlStringGetNodeList'/>
<exports symbol='xmlStringLenGetNodeList'/>
<exports symbol='xmlTextConcat'/>
@ -3633,6 +3637,12 @@ actually an xmlCharEncoding'/>
<arg name='filename' type='const char *' info='the filename'/>
<arg name='encoding' type='const char *' info='a free form C string describing the HTML document encoding, or NULL'/>
</function>
<function name='htmlCreateMemoryParserCtxt' file='HTMLparser'>
<info>Create a parser context for an HTML in-memory document.</info>
<return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
<arg name='buffer' type='const char *' info='a pointer to a char array'/>
<arg name='size' type='int' info='the size of the array'/>
</function>
<function name='htmlCreatePushParserCtxt' file='HTMLparser'>
<info>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.</info>
<return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
@ -4560,6 +4570,14 @@ actually an xmlCharEncoding'/>
<arg name='buf' type='xmlBufferPtr' info='the XML buffer output'/>
<arg name='string' type='const xmlChar *' info='the string to add'/>
</function>
<function name='xmlBuildQName' file='tree'>
<info>Builds the QName @prefix:@ncname in @memory if there is enough space and prefix is not NULL nor empty, otherwise allocate a new string. If prefix is NULL or empty it returns ncname.</info>
<return type='xmlChar *' info='the new string which must be freed by the caller if different from @memory and @ncname or NULL in case of error'/>
<arg name='ncname' type='const xmlChar *' info='the Name'/>
<arg name='prefix' type='const xmlChar *' info='the prefix'/>
<arg name='memory' type='xmlChar *' info='preallocated memory'/>
<arg name='len' type='int' info='preallocated memory length'/>
</function>
<function name='xmlBuildURI' file='uri'>
<info>Computes he final URI of the reference done by checking that the given URI is valid, and building the final URI using the base URI. This is processed according to section 5.2 of the RFC 2396 5.2. Resolving Relative References to Absolute Form</info>
<return type='xmlChar *' info='a new URI string (to be freed by the caller) or NULL in case of error.'/>
@ -8076,10 +8094,10 @@ actually an xmlCharEncoding'/>
<arg name='name' type='const xmlChar *' info='an XML parser context'/>
<arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
</function>
<function name='xmlSplitQName2' file='valid'>
<function name='xmlSplitQName2' file='tree'>
<info>parse an XML qualified name string [NS 5] QName ::= (Prefix &apos;:&apos;)? LocalPart [NS 6] Prefix ::= NCName [NS 7] LocalPart ::= NCName</info>
<return type='xmlChar *' info='NULL if not a QName, otherwise the local part, and prefix is updated to get the Prefix if any.'/>
<arg name='name' type='const xmlChar *' info='an XML parser context'/>
<arg name='name' type='const xmlChar *' info='the full QName'/>
<arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
</function>
<function name='xmlSprintfElementContent' file='valid'>
@ -8283,6 +8301,11 @@ actually an xmlCharEncoding'/>
<arg name='severity' type='xmlParserSeverities' info=''/>
<arg name='locator' type='xmlTextReaderLocatorPtr' info=''/>
</functype>
<function name='xmlTextReaderExpand' file='xmlreader'>
<info>Reads the contents of the current node and the full subtree. It then makes the subtree availsble until the next xmlTextReaderRead() call</info>
<return type='xmlNodePtr' info='a node pointer valid until the next xmlTextReaderRead() call or NULL in case of error.'/>
<arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
</function>
<function name='xmlTextReaderGetAttribute' file='xmlreader'>
<info>Provides the value of the attribute with the specified qualified name.</info>
<return type='xmlChar *' info='a string containing the value of the specified attribute, or NULL in case of error. The string must be deallocated by the caller.'/>
@ -8405,6 +8428,11 @@ actually an xmlCharEncoding'/>
<return type='xmlChar *' info='the namespace URI or NULL if not available'/>
<arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
</function>
<function name='xmlTextReaderNext' file='xmlreader'>
<info>Skip to the node following the current one in document order while avoiding the subtree if any.</info>
<return type='int' info='1 if the node was read successfully, 0 if there is no more nodes to read, or -1 in case of error'/>
<arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
</function>
<function name='xmlTextReaderNodeType' file='xmlreader'>
<info>Get the node type of the current node Reference: http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html</info>
<return type='int' info='the xmlNodeType of the current node or -1 in case of error'/>

View File

@ -106,6 +106,8 @@ int xmlTextReaderGetParserProp (xmlTextReaderPtr reader,
int prop);
xmlNodePtr xmlTextReaderCurrentNode (xmlTextReaderPtr reader);
xmlDocPtr xmlTextReaderCurrentDoc (xmlTextReaderPtr reader);
xmlNodePtr xmlTextReaderExpand (xmlTextReaderPtr reader);
int xmlTextReaderNext (xmlTextReaderPtr reader);
/*
* Error handling extensions

View File

@ -6,6 +6,7 @@
# functions from module HTMLparser
htmlCreateMemoryParserCtxt()
htmlHandleOmittedElem()
htmlIsScriptAttribute()
htmlParseDoc()
@ -132,6 +133,7 @@ relaxNGNewMemParserCtxt()
relaxNGNewParserCtxt()
# functions from module tree
buildQName()
compressMode()
isXHTML()
newComment()
@ -594,6 +596,7 @@ Class xmlTextReader(xmlTextReaderCore)
CurrentDoc()
CurrentNode()
Depth()
Expand()
GetAttribute()
GetAttributeNo()
GetAttributeNs()
@ -613,6 +616,7 @@ Class xmlTextReader(xmlTextReaderCore)
MoveToNextAttribute()
Name()
NamespaceUri()
Next()
NodeType()
Normalization()
Prefix()

48
python/tests/reader5.py Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/python -u
#
# this tests the Expand() API of the xmlTextReader interface
# this extract the Dragon bibliography entries from the XML specification
#
import libxml2
import StringIO
import sys
# Memory debug specific
libxml2.debugMemory(1)
expect="""<bibl id="Aho" key="Aho/Ullman">Aho, Alfred V.,
Ravi Sethi, and Jeffrey D. Ullman.
<emph>Compilers: Principles, Techniques, and Tools</emph>.
Reading: Addison-Wesley, 1986, rpt. corr. 1988.</bibl>"""
f = open('../../test/valid/REC-xml-19980210.xml')
input = libxml2.inputBuffer(f)
reader = input.newTextReader("REC")
res=""
while reader.Read():
while reader.Name() == 'bibl':
node = reader.Expand() # expand the subtree
if node.xpathEval("@id = 'Aho'"): # use XPath on it
res = res + node.serialize()
if reader.Next() != 1: # skip the subtree
break;
if res != expect:
print "Error: didn't get the expected output"
print "got '%s'" % (res)
print "expected '%s'" % (expect)
#
# cleanup
#
del input
del reader
# Memory debug specific
libxml2.cleanupParser()
if libxml2.debugMemory(1) == 0:
print "OK"
else:
print "Memory leak %d bytes" % (libxml2.debugMemory(1))
libxml2.dumpMemory()

View File

@ -554,6 +554,56 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) {
}
/**
* xmlTextReaderGetSuccessor:
* @cur: the current node
*
* Get the successor of a node if available.
*
* Returns the successor node or NULL
*/
static xmlNodePtr
xmlTextReaderGetSuccessor(xmlNodePtr cur) {
if (cur == NULL) return(NULL) ; /* ERROR */
if (cur->next != NULL) return(cur->next) ;
do {
cur = cur->parent;
if (cur == NULL) return(NULL);
if (cur->next != NULL) return(cur->next);
} while (cur != NULL);
return(cur);
}
/**
* xmlTextReaderDoExpand:
* @reader: the xmlTextReaderPtr used
*
* Makes sure that the current node is fully read as well as all its
* descendant. It means the full DOM subtree must be available at the
* end of the call.
*
* Returns 1 if the node was expanded successfully, 0 if there is no more
* nodes to read, or -1 in case of error
*/
static int
xmlTextReaderDoExpand(xmlTextReaderPtr reader) {
int val;
if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
return(-1);
do {
if (xmlTextReaderGetSuccessor(reader->node) != NULL)
return(1);
if (reader->mode == XML_TEXTREADER_MODE_EOF)
return(1);
val = xmlTextReaderPushData(reader);
if (val < 0)
return(-1);
} while(reader->mode != XML_TEXTREADER_MODE_EOF);
return(1);
}
/**
* xmlTextReaderRead:
* @reader: the xmlTextReaderPtr used
@ -804,6 +854,7 @@ node_found:
#endif /* LIBXML_REGEXP_ENABLED */
return(1);
node_end:
reader->mode = XML_TEXTREADER_DONE;
return(0);
}
@ -822,6 +873,57 @@ xmlTextReaderReadState(xmlTextReaderPtr reader) {
return(reader->mode);
}
/**
* xmlTextReaderExpand:
* @reader: the xmlTextReaderPtr used
*
* Reads the contents of the current node and the full subtree. It then makes
* the subtree availsble until the next xmlTextReaderRead() call
*
* Returns a node pointer valid until the next xmlTextReaderRead() call
* or NULL in case of error.
*/
xmlNodePtr
xmlTextReaderExpand(xmlTextReaderPtr reader) {
if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
return(NULL);
if (xmlTextReaderDoExpand(reader) < 0)
return(NULL);
return(reader->node);
}
/**
* xmlTextReaderNext:
* @reader: the xmlTextReaderPtr used
*
* Skip to the node following the current one in document order while
* avoiding the subtree if any.
*
* Returns 1 if the node was read successfully, 0 if there is no more
* nodes to read, or -1 in case of error
*/
int
xmlTextReaderNext(xmlTextReaderPtr reader) {
int ret;
xmlNodePtr cur;
if (reader == NULL)
return(-1);
cur = reader->node;
if ((cur == NULL) || (cur->type != XML_ELEMENT_NODE))
return(xmlTextReaderRead(reader));
if (reader->state == XML_TEXTREADER_END)
return(xmlTextReaderRead(reader));
if (cur->_private == (void *)xmlTextReaderIsEmpty)
return(xmlTextReaderRead(reader));
do {
ret = xmlTextReaderRead(reader);
if (ret != 1)
return(ret);
} while (reader->node != cur);
return(xmlTextReaderRead(reader));
}
/**
* xmlTextReaderReadInnerXml:
* @reader: the xmlTextReaderPtr used