mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
added the same htmlRead APIs than their XML counterparts new parser
* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h: added the same htmlRead APIs than their XML counterparts * include/libxml/parser.h: new parser options, not yet implemented, added an options field to the context. * tree.c: patch from Shaun McCance to fix bug #123238 when ]]> is found within a cdata section. * result/noent/cdata2 result/cdata2 result/cdata2.rdr result/cdata2.sax test/cdata2: add one more cdata test Daniel
This commit is contained in:
parent
60942def6a
commit
9475a352bd
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
|||||||
|
Fri Sep 26 14:41:53 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* HTMLparser.c testHTML.c xmllint.c include/libxml/HTMLparser.h:
|
||||||
|
added the same htmlRead APIs than their XML counterparts
|
||||||
|
* include/libxml/parser.h: new parser options, not yet implemented,
|
||||||
|
added an options field to the context.
|
||||||
|
* tree.c: patch from Shaun McCance to fix bug #123238 when ]]>
|
||||||
|
is found within a cdata section.
|
||||||
|
* result/noent/cdata2 result/cdata2 result/cdata2.rdr
|
||||||
|
result/cdata2.sax test/cdata2: add one more cdata test
|
||||||
|
|
||||||
Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
Thu Sep 25 23:03:23 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h:
|
* parser.c xmllint.c doc/libxml2-api.xml include/libxml/parser.h:
|
||||||
|
525
HTMLparser.c
525
HTMLparser.c
@ -5541,4 +5541,529 @@ htmlNodeStatus(const htmlNodePtr node, int legacy) {
|
|||||||
default: return HTML_NA ;
|
default: return HTML_NA ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/************************************************************************
|
||||||
|
* *
|
||||||
|
* New set (2.6.0) of simpler and more flexible APIs *
|
||||||
|
* *
|
||||||
|
************************************************************************/
|
||||||
|
/**
|
||||||
|
* DICT_FREE:
|
||||||
|
* @str: a string
|
||||||
|
*
|
||||||
|
* Free a string if it is not owned by the "dict" dictionnary in the
|
||||||
|
* current scope
|
||||||
|
*/
|
||||||
|
#define DICT_FREE(str) \
|
||||||
|
if ((str) && ((!dict) || \
|
||||||
|
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
|
||||||
|
xmlFree((char *)(str));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReset:
|
||||||
|
* @ctxt: an XML parser context
|
||||||
|
*
|
||||||
|
* Reset a parser context
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
htmlCtxtReset(htmlParserCtxtPtr ctxt)
|
||||||
|
{
|
||||||
|
xmlParserInputPtr input;
|
||||||
|
xmlDictPtr dict = ctxt->dict;
|
||||||
|
|
||||||
|
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
|
||||||
|
xmlFreeInputStream(input);
|
||||||
|
}
|
||||||
|
ctxt->inputNr = 0;
|
||||||
|
ctxt->input = NULL;
|
||||||
|
|
||||||
|
ctxt->spaceNr = 0;
|
||||||
|
ctxt->spaceTab[0] = -1;
|
||||||
|
ctxt->space = &ctxt->spaceTab[0];
|
||||||
|
|
||||||
|
|
||||||
|
ctxt->nodeNr = 0;
|
||||||
|
ctxt->node = NULL;
|
||||||
|
|
||||||
|
ctxt->nameNr = 0;
|
||||||
|
ctxt->name = NULL;
|
||||||
|
|
||||||
|
DICT_FREE(ctxt->version);
|
||||||
|
ctxt->version = NULL;
|
||||||
|
DICT_FREE(ctxt->encoding);
|
||||||
|
ctxt->encoding = NULL;
|
||||||
|
DICT_FREE(ctxt->directory);
|
||||||
|
ctxt->directory = NULL;
|
||||||
|
DICT_FREE(ctxt->extSubURI);
|
||||||
|
ctxt->extSubURI = NULL;
|
||||||
|
DICT_FREE(ctxt->extSubSystem);
|
||||||
|
ctxt->extSubSystem = NULL;
|
||||||
|
if (ctxt->myDoc != NULL)
|
||||||
|
xmlFreeDoc(ctxt->myDoc);
|
||||||
|
ctxt->myDoc = NULL;
|
||||||
|
|
||||||
|
ctxt->standalone = -1;
|
||||||
|
ctxt->hasExternalSubset = 0;
|
||||||
|
ctxt->hasPErefs = 0;
|
||||||
|
ctxt->html = 1;
|
||||||
|
ctxt->external = 0;
|
||||||
|
ctxt->instate = XML_PARSER_START;
|
||||||
|
ctxt->token = 0;
|
||||||
|
|
||||||
|
ctxt->wellFormed = 1;
|
||||||
|
ctxt->nsWellFormed = 1;
|
||||||
|
ctxt->valid = 1;
|
||||||
|
ctxt->vctxt.userData = ctxt;
|
||||||
|
ctxt->vctxt.error = xmlParserValidityError;
|
||||||
|
ctxt->vctxt.warning = xmlParserValidityWarning;
|
||||||
|
ctxt->record_info = 0;
|
||||||
|
ctxt->nbChars = 0;
|
||||||
|
ctxt->checkIndex = 0;
|
||||||
|
ctxt->inSubset = 0;
|
||||||
|
ctxt->errNo = XML_ERR_OK;
|
||||||
|
ctxt->depth = 0;
|
||||||
|
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||||
|
ctxt->catalogs = NULL;
|
||||||
|
xmlInitNodeInfoSeq(&ctxt->node_seq);
|
||||||
|
|
||||||
|
if (ctxt->attsDefault != NULL) {
|
||||||
|
xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
|
||||||
|
ctxt->attsDefault = NULL;
|
||||||
|
}
|
||||||
|
if (ctxt->attsSpecial != NULL) {
|
||||||
|
xmlHashFree(ctxt->attsSpecial, NULL);
|
||||||
|
ctxt->attsSpecial = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtUseOptions:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* Applies the options to the parser context
|
||||||
|
*
|
||||||
|
* Returns 0 in case of success, the set of unknown or unimplemented options
|
||||||
|
* in case of error.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
|
||||||
|
{
|
||||||
|
if (options & HTML_PARSE_NOWARNING) {
|
||||||
|
ctxt->sax->warning = NULL;
|
||||||
|
options -= XML_PARSE_NOWARNING;
|
||||||
|
}
|
||||||
|
if (options & HTML_PARSE_NOERROR) {
|
||||||
|
ctxt->sax->error = NULL;
|
||||||
|
ctxt->sax->fatalError = NULL;
|
||||||
|
options -= XML_PARSE_NOERROR;
|
||||||
|
}
|
||||||
|
if (options & HTML_PARSE_PEDANTIC) {
|
||||||
|
ctxt->pedantic = 1;
|
||||||
|
options -= XML_PARSE_PEDANTIC;
|
||||||
|
} else
|
||||||
|
ctxt->pedantic = 0;
|
||||||
|
if (options & XML_PARSE_NOBLANKS) {
|
||||||
|
ctxt->keepBlanks = 0;
|
||||||
|
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
||||||
|
options -= XML_PARSE_NOBLANKS;
|
||||||
|
} else
|
||||||
|
ctxt->keepBlanks = 1;
|
||||||
|
ctxt->dictNames = 0;
|
||||||
|
return (options);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlDoRead:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
* @reuse: keep the context for reuse
|
||||||
|
*
|
||||||
|
* Common front-end for the htmlRead functions
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree or NULL
|
||||||
|
*/
|
||||||
|
static htmlDocPtr
|
||||||
|
htmlDoRead(htmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
|
||||||
|
int options, int reuse)
|
||||||
|
{
|
||||||
|
htmlDocPtr ret;
|
||||||
|
|
||||||
|
htmlCtxtUseOptions(ctxt, options);
|
||||||
|
ctxt->html = 1;
|
||||||
|
if (encoding != NULL) {
|
||||||
|
xmlCharEncodingHandlerPtr hdlr;
|
||||||
|
|
||||||
|
hdlr = xmlFindCharEncodingHandler(encoding);
|
||||||
|
if (hdlr != NULL)
|
||||||
|
xmlSwitchToEncoding(ctxt, hdlr);
|
||||||
|
}
|
||||||
|
if ((URL != NULL) && (ctxt->input != NULL) &&
|
||||||
|
(ctxt->input->filename == NULL))
|
||||||
|
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
|
||||||
|
htmlParseDocument(ctxt);
|
||||||
|
ret = ctxt->myDoc;
|
||||||
|
ctxt->myDoc = NULL;
|
||||||
|
if (!reuse) {
|
||||||
|
if ((ctxt->dictNames) &&
|
||||||
|
(ret != NULL) &&
|
||||||
|
(ret->dict == ctxt->dict))
|
||||||
|
ctxt->dict = NULL;
|
||||||
|
xmlFreeParserCtxt(ctxt);
|
||||||
|
} else {
|
||||||
|
/* Must duplicate the reference to the dictionary */
|
||||||
|
if ((ctxt->dictNames) &&
|
||||||
|
(ret != NULL) &&
|
||||||
|
(ret->dict == ctxt->dict))
|
||||||
|
xmlDictReference(ctxt->dict);
|
||||||
|
}
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlReadDoc:
|
||||||
|
* @cur: a pointer to a zero terminated string
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML in-memory document and build a tree.
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr ctxt;
|
||||||
|
|
||||||
|
if (cur == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
ctxt = xmlCreateDocParserCtxt(cur);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlReadFile:
|
||||||
|
* @filename: a file or URL
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML file from the filesystem or the network.
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlReadFile(const char *filename, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr ctxt;
|
||||||
|
|
||||||
|
ctxt = htmlCreateFileParserCtxt(filename, encoding);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
return (htmlDoRead(ctxt, NULL, NULL, options, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlReadMemory:
|
||||||
|
* @buffer: a pointer to a char array
|
||||||
|
* @size: the size of the array
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML in-memory document and build a tree.
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr ctxt;
|
||||||
|
|
||||||
|
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlReadFd:
|
||||||
|
* @fd: an open file descriptor
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML from a file descriptor and build a tree.
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlReadFd(int fd, const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr ctxt;
|
||||||
|
xmlParserInputBufferPtr input;
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (fd < 0)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (input == NULL)
|
||||||
|
return (NULL);
|
||||||
|
ctxt = xmlNewParserCtxt();
|
||||||
|
if (ctxt == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (stream == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
xmlFreeParserCtxt(ctxt);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlReadIO:
|
||||||
|
* @ioread: an I/O read function
|
||||||
|
* @ioclose: an I/O close function
|
||||||
|
* @ioctx: an I/O handler
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an HTML document from I/O functions and source and build a tree.
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
|
||||||
|
void *ioctx, const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
htmlParserCtxtPtr ctxt;
|
||||||
|
xmlParserInputBufferPtr input;
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (ioread == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
||||||
|
XML_CHAR_ENCODING_NONE);
|
||||||
|
if (input == NULL)
|
||||||
|
return (NULL);
|
||||||
|
ctxt = xmlNewParserCtxt();
|
||||||
|
if (ctxt == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (stream == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
xmlFreeParserCtxt(ctxt);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReadDoc:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @cur: a pointer to a zero terminated string
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML in-memory document and build a tree.
|
||||||
|
* This reuses the existing @ctxt parser context
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
|
||||||
|
const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (cur == NULL)
|
||||||
|
return (NULL);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
htmlCtxtReset(ctxt);
|
||||||
|
|
||||||
|
stream = xmlNewStringInputStream(ctxt, cur);
|
||||||
|
if (stream == NULL) {
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReadFile:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @filename: a file or URL
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML file from the filesystem or the network.
|
||||||
|
* This reuses the existing @ctxt parser context
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlCtxtReadFile(htmlParserCtxtPtr ctxt, const char *filename,
|
||||||
|
const char *encoding, int options)
|
||||||
|
{
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (filename == NULL)
|
||||||
|
return (NULL);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
htmlCtxtReset(ctxt);
|
||||||
|
|
||||||
|
stream = xmlNewInputFromFile(ctxt, filename);
|
||||||
|
if (stream == NULL) {
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, NULL, encoding, options, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReadMemory:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @buffer: a pointer to a char array
|
||||||
|
* @size: the size of the array
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML in-memory document and build a tree.
|
||||||
|
* This reuses the existing @ctxt parser context
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlCtxtReadMemory(htmlParserCtxtPtr ctxt, const char *buffer, int size,
|
||||||
|
const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
xmlParserInputBufferPtr input;
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
if (buffer == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
htmlCtxtReset(ctxt);
|
||||||
|
|
||||||
|
input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (input == NULL) {
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (stream == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReadFd:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @fd: an open file descriptor
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an XML from a file descriptor and build a tree.
|
||||||
|
* This reuses the existing @ctxt parser context
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlCtxtReadFd(htmlParserCtxtPtr ctxt, int fd,
|
||||||
|
const char *URL, const char *encoding, int options)
|
||||||
|
{
|
||||||
|
xmlParserInputBufferPtr input;
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (fd < 0)
|
||||||
|
return (NULL);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
htmlCtxtReset(ctxt);
|
||||||
|
|
||||||
|
|
||||||
|
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (input == NULL)
|
||||||
|
return (NULL);
|
||||||
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (stream == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlCtxtReadIO:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
* @ioread: an I/O read function
|
||||||
|
* @ioclose: an I/O close function
|
||||||
|
* @ioctx: an I/O handler
|
||||||
|
* @URL: the base URL to use for the document
|
||||||
|
* @encoding: the document encoding, or NULL
|
||||||
|
* @options: a combination of htmlParserOption(s)
|
||||||
|
*
|
||||||
|
* parse an HTML document from I/O functions and source and build a tree.
|
||||||
|
* This reuses the existing @ctxt parser context
|
||||||
|
*
|
||||||
|
* Returns the resulting document tree
|
||||||
|
*/
|
||||||
|
htmlDocPtr
|
||||||
|
htmlCtxtReadIO(htmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
|
||||||
|
xmlInputCloseCallback ioclose, void *ioctx,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding, int options)
|
||||||
|
{
|
||||||
|
xmlParserInputBufferPtr input;
|
||||||
|
xmlParserInputPtr stream;
|
||||||
|
|
||||||
|
if (ioread == NULL)
|
||||||
|
return (NULL);
|
||||||
|
if (ctxt == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
htmlCtxtReset(ctxt);
|
||||||
|
|
||||||
|
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
|
||||||
|
XML_CHAR_ENCODING_NONE);
|
||||||
|
if (input == NULL)
|
||||||
|
return (NULL);
|
||||||
|
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
|
||||||
|
if (stream == NULL) {
|
||||||
|
xmlFreeParserInputBuffer(input);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
inputPush(ctxt, stream);
|
||||||
|
return (htmlDoRead(ctxt, URL, encoding, options, 1));
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* LIBXML_HTML_ENABLED */
|
#endif /* LIBXML_HTML_ENABLED */
|
||||||
|
@ -154,6 +154,88 @@ XMLPUBFUN int XMLCALL
|
|||||||
int size,
|
int size,
|
||||||
int terminate);
|
int terminate);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* New set of simpler/more flexible APIs
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* xmlParserOption:
|
||||||
|
*
|
||||||
|
* This is the set of XML parser options that can be passed down
|
||||||
|
* to the xmlReadDoc() and similar calls.
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
|
||||||
|
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
|
||||||
|
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */
|
||||||
|
HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */
|
||||||
|
HTML_PARSE_NONET = 1<<11 /* Forbid network access */
|
||||||
|
} htmlParserOption;
|
||||||
|
|
||||||
|
XMLPUBFUN void XMLCALL
|
||||||
|
htmlCtxtReset (htmlParserCtxtPtr ctxt);
|
||||||
|
XMLPUBFUN int XMLCALL
|
||||||
|
htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlReadDoc (const xmlChar *cur,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlReadFile (const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlReadMemory (const char *buffer,
|
||||||
|
int size,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlReadFd (int fd,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlReadIO (xmlInputReadCallback ioread,
|
||||||
|
xmlInputCloseCallback ioclose,
|
||||||
|
void *ioctx,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
|
||||||
|
const xmlChar *cur,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
|
||||||
|
const char *filename,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
|
||||||
|
const char *buffer,
|
||||||
|
int size,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
|
||||||
|
int fd,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
XMLPUBFUN htmlDocPtr XMLCALL
|
||||||
|
htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
|
||||||
|
xmlInputReadCallback ioread,
|
||||||
|
xmlInputCloseCallback ioclose,
|
||||||
|
void *ioctx,
|
||||||
|
const char *URL,
|
||||||
|
const char *encoding,
|
||||||
|
int options);
|
||||||
|
|
||||||
/* NRK/Jan2003: further knowledge of HTML structure
|
/* NRK/Jan2003: further knowledge of HTML structure
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -262,15 +262,16 @@ struct _xmlParserCtxt {
|
|||||||
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
|
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
|
||||||
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
|
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
|
||||||
int nsWellFormed; /* is the document XML Nanespace okay */
|
int nsWellFormed; /* is the document XML Nanespace okay */
|
||||||
|
int options; /* Extra options */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Those fields are needed only for treaming parsing so far
|
* Those fields are needed only for treaming parsing so far
|
||||||
*/
|
*/
|
||||||
int dictNames; /* Use dictionary names for the tree */
|
int dictNames; /* Use dictionary names for the tree */
|
||||||
int freeElemsNr; /* number of freed element nodes */
|
int freeElemsNr; /* number of freed element nodes */
|
||||||
xmlNodePtr freeElems; /* List of freed element nodes */
|
xmlNodePtr freeElems; /* List of freed element nodes */
|
||||||
int freeAttrsNr; /* number of freed attributes nodes */
|
int freeAttrsNr; /* number of freed attributes nodes */
|
||||||
xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
|
xmlAttrPtr freeAttrs; /* List of freed attributes nodes */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1045,7 +1046,9 @@ typedef enum {
|
|||||||
XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
|
XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */
|
||||||
XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
|
XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitition */
|
||||||
XML_PARSE_NONET = 1<<11,/* Forbid network access */
|
XML_PARSE_NONET = 1<<11,/* Forbid network access */
|
||||||
XML_PARSE_NODICT = 1<<12 /* Do not reuse the context dictionnary */
|
XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */
|
||||||
|
XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */
|
||||||
|
XML_PARSE_NOCDATA = 1<<14 /* merge CDATA as text nodes */
|
||||||
} xmlParserOption;
|
} xmlParserOption;
|
||||||
|
|
||||||
XMLPUBFUN void XMLCALL
|
XMLPUBFUN void XMLCALL
|
||||||
|
6
result/cdata2
Normal file
6
result/cdata2
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<collection>
|
||||||
|
<test><![CDATA[
|
||||||
|
<![CDATA[abc]]]>]><![CDATA[
|
||||||
|
]]></test>
|
||||||
|
</collection>
|
13
result/cdata2.rdr
Normal file
13
result/cdata2.rdr
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
0 1 collection 0 0
|
||||||
|
1 14 #text 0 1
|
||||||
|
|
||||||
|
1 1 test 0 0
|
||||||
|
2 4 #cdata-section 0 1
|
||||||
|
<![CDATA[abc]
|
||||||
|
2 3 #text 0 1 ]>
|
||||||
|
2 4 #cdata-section 0 1
|
||||||
|
|
||||||
|
1 15 test 0 0
|
||||||
|
1 14 #text 0 1
|
||||||
|
|
||||||
|
0 15 collection 0 0
|
18
result/cdata2.sax
Normal file
18
result/cdata2.sax
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
SAX.setDocumentLocator()
|
||||||
|
SAX.startDocument()
|
||||||
|
SAX.startElement(collection)
|
||||||
|
SAX.characters(
|
||||||
|
, 3)
|
||||||
|
SAX.startElement(test)
|
||||||
|
SAX.pcdata(
|
||||||
|
<![CDATA[abc], 18)
|
||||||
|
SAX.characters(], 1)
|
||||||
|
SAX.getEntity(gt)
|
||||||
|
SAX.characters(>, 1)
|
||||||
|
SAX.pcdata(
|
||||||
|
, 3)
|
||||||
|
SAX.endElement(test)
|
||||||
|
SAX.characters(
|
||||||
|
, 1)
|
||||||
|
SAX.endElement(collection)
|
||||||
|
SAX.endDocument()
|
6
result/noent/cdata2
Normal file
6
result/noent/cdata2
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<collection>
|
||||||
|
<test><![CDATA[
|
||||||
|
<![CDATA[abc]]]>]><![CDATA[
|
||||||
|
]]></test>
|
||||||
|
</collection>
|
6
test/cdata2
Normal file
6
test/cdata2
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<collection>
|
||||||
|
<test><![CDATA[
|
||||||
|
<![CDATA[abc]]]>]><![CDATA[
|
||||||
|
]]></test>
|
||||||
|
</collection>
|
@ -46,6 +46,7 @@ static int repeat = 0;
|
|||||||
static int noout = 0;
|
static int noout = 0;
|
||||||
static int push = 0;
|
static int push = 0;
|
||||||
static char *encoding = NULL;
|
static char *encoding = NULL;
|
||||||
|
static int options = 0;
|
||||||
|
|
||||||
xmlSAXHandler emptySAXHandlerStruct = {
|
xmlSAXHandler emptySAXHandlerStruct = {
|
||||||
NULL, /* internalSubset */
|
NULL, /* internalSubset */
|
||||||
@ -725,7 +726,7 @@ parseAndPrintFile(char *filename) {
|
|||||||
fclose(f);
|
fclose(f);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
doc = htmlParseFile(filename, NULL);
|
doc = htmlReadFile(filename, NULL, options);
|
||||||
}
|
}
|
||||||
if (doc == NULL) {
|
if (doc == NULL) {
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
|
66
tree.c
66
tree.c
@ -7273,6 +7273,7 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
xmlNodePtr cur, int level, int format, const char *encoding) {
|
xmlNodePtr cur, int level, int format, const char *encoding) {
|
||||||
int i;
|
int i;
|
||||||
xmlNodePtr tmp;
|
xmlNodePtr tmp;
|
||||||
|
xmlChar *start, *end;
|
||||||
|
|
||||||
if (cur == NULL) {
|
if (cur == NULL) {
|
||||||
#ifdef DEBUG_TREE
|
#ifdef DEBUG_TREE
|
||||||
@ -7356,10 +7357,22 @@ xmlNodeDumpOutputInternal(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (cur->type == XML_CDATA_SECTION_NODE) {
|
if (cur->type == XML_CDATA_SECTION_NODE) {
|
||||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
start = end = cur->content;
|
||||||
if (cur->content != NULL)
|
while (*end != '\0') {
|
||||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
if ((*end == ']') && (*(end + 1) == ']') && (*(end + 2) == '>')) {
|
||||||
xmlOutputBufferWriteString(buf, "]]>");
|
end = end + 2;
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWrite(buf, end - start, (const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
if (start != end) {
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (cur->type == XML_ATTRIBUTE_NODE) {
|
if (cur->type == XML_ATTRIBUTE_NODE) {
|
||||||
@ -7810,6 +7823,7 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
|||||||
int level, int format, const char *encoding) {
|
int level, int format, const char *encoding) {
|
||||||
int i;
|
int i;
|
||||||
xmlNodePtr tmp;
|
xmlNodePtr tmp;
|
||||||
|
xmlChar *start, *end;
|
||||||
|
|
||||||
if (cur == NULL) {
|
if (cur == NULL) {
|
||||||
#ifdef DEBUG_TREE
|
#ifdef DEBUG_TREE
|
||||||
@ -7893,10 +7907,22 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (cur->type == XML_CDATA_SECTION_NODE) {
|
if (cur->type == XML_CDATA_SECTION_NODE) {
|
||||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
start = end = cur->content;
|
||||||
if (cur->content != NULL)
|
while (*end != '\0') {
|
||||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
|
||||||
xmlOutputBufferWriteString(buf, "]]>");
|
end = end + 2;
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWrite(buf, end - start, (const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
if (start != end) {
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7989,11 +8015,25 @@ xhtmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
|
|||||||
(xmlStrchr(child->content, '&') == NULL)) {
|
(xmlStrchr(child->content, '&') == NULL)) {
|
||||||
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
||||||
} else {
|
} else {
|
||||||
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
start = end = child->content;
|
||||||
if (child->content != NULL)
|
while (*end != '\0') {
|
||||||
xmlOutputBufferWriteString(buf,
|
if (*end == ']' &&
|
||||||
(const char *)child->content);
|
*(end + 1) == ']' &&
|
||||||
xmlOutputBufferWriteString(buf, "]]>");
|
*(end + 2) == '>') {
|
||||||
|
end = end + 2;
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWrite(buf, end - start,
|
||||||
|
(const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
start = end;
|
||||||
|
}
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
if (start != end) {
|
||||||
|
xmlOutputBufferWriteString(buf, "<![CDATA[");
|
||||||
|
xmlOutputBufferWriteString(buf, (const char *)start);
|
||||||
|
xmlOutputBufferWriteString(buf, "]]>");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
|
||||||
|
@ -764,7 +764,7 @@ static void parseAndPrintFile(char *filename, xmlParserCtxtPtr rectxt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (html) {
|
else if (html) {
|
||||||
doc = htmlParseFile(filename, NULL);
|
doc = htmlReadFile(filename, NULL, options);
|
||||||
}
|
}
|
||||||
#endif /* LIBXML_HTML_ENABLED */
|
#endif /* LIBXML_HTML_ENABLED */
|
||||||
else {
|
else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user