2005-09-12 13:10:09 +00:00
< html >
2022-08-24 14:34:04 +02:00
< head >
< meta http-equiv = "Content-Type" content = "text/html; charset=UTF-8" >
< title > HTMLparser: interface for an HTML 4.0 non-verifying parser< / title >
< meta name = "generator" content = "Libxml2 devhelp stylesheet" >
< link rel = "start" href = "index.html" title = "libxml2 Reference Manual" >
< link rel = "up" href = "general.html" title = "API" >
< link rel = "stylesheet" href = "style.css" type = "text/css" >
< link rel = "chapter" href = "general.html" title = "API" >
< / head >
< body bgcolor = "white" text = "black" link = "#0000FF" vlink = "#840084" alink = "#0000FF" >
< table class = "navigation" width = "100%" summary = "Navigation header" cellpadding = "2" cellspacing = "2" > < tr valign = "middle" >
< td > < a accesskey = "u" href = "general.html" > < img src = "up.png" width = "24" height = "24" border = "0" alt = "Up" > < / a > < / td >
< td > < a accesskey = "h" href = "index.html" > < img src = "home.png" width = "24" height = "24" border = "0" alt = "Home" > < / a > < / td >
< td > < a accesskey = "n" href = "libxml2-HTMLtree.html" > < img src = "right.png" width = "24" height = "24" border = "0" alt = "Next" > < / a > < / td >
< th width = "100%" align = "center" > libxml2 Reference Manual< / th >
< / tr > < / table >
< h2 > < span class = "refentrytitle" > HTMLparser< / span > < / h2 >
< p > HTMLparser - interface for an HTML 4.0 non-verifying parser< / p >
< p > this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view. < / p >
< p > Author(s): Daniel Veillard < / p >
< div class = "refsynopsisdiv" >
< h2 > Synopsis< / h2 >
< pre class = "synopsis" > #define < a href = "#htmlDefaultSubelement" > htmlDefaultSubelement< / a > (elt);
2005-09-12 13:10:09 +00:00
#define < a href = "#htmlElementAllowedHereDesc" > htmlElementAllowedHereDesc< / a > (parent, elt);
#define < a href = "#htmlRequiredAttrs" > htmlRequiredAttrs< / a > (elt);
typedef < a href = "libxml2-tree.html#xmlDocPtr" > xmlDocPtr< / a > < a href = "#htmlDocPtr" > htmlDocPtr< / a > ;
typedef struct _htmlElemDesc < a href = "#htmlElemDesc" > htmlElemDesc< / a > ;
2022-08-19 11:28:49 +02:00
typedef < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * < a href = "#htmlElemDescPtr" > htmlElemDescPtr< / a > ;
typedef struct _htmlEntityDesc < a href = "#htmlEntityDesc" > htmlEntityDesc< / a > ;
2005-09-12 13:10:09 +00:00
typedef < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * < a href = "#htmlEntityDescPtr" > htmlEntityDescPtr< / a > ;
2022-08-19 11:28:49 +02:00
typedef < a href = "libxml2-tree.html#xmlNodePtr" > xmlNodePtr< / a > < a href = "#htmlNodePtr" > htmlNodePtr< / a > ;
2005-09-12 13:10:09 +00:00
typedef < a href = "libxml2-tree.html#xmlParserCtxt" > xmlParserCtxt< / a > < a href = "#htmlParserCtxt" > htmlParserCtxt< / a > ;
2022-08-19 11:28:49 +02:00
typedef < a href = "libxml2-tree.html#xmlParserCtxtPtr" > xmlParserCtxtPtr< / a > < a href = "#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ;
typedef < a href = "libxml2-tree.html#xmlParserInput" > xmlParserInput< / a > < a href = "#htmlParserInput" > htmlParserInput< / a > ;
typedef < a href = "libxml2-tree.html#xmlParserInputPtr" > xmlParserInputPtr< / a > < a href = "#htmlParserInputPtr" > htmlParserInputPtr< / a > ;
typedef < a href = "libxml2-parser.html#xmlParserNodeInfo" > xmlParserNodeInfo< / a > < a href = "#htmlParserNodeInfo" > htmlParserNodeInfo< / a > ;
typedef enum < a href = "#htmlParserOption" > htmlParserOption< / a > ;
typedef < a href = "libxml2-tree.html#xmlSAXHandler" > xmlSAXHandler< / a > < a href = "#htmlSAXHandler" > htmlSAXHandler< / a > ;
typedef < a href = "libxml2-tree.html#xmlSAXHandlerPtr" > xmlSAXHandlerPtr< / a > < a href = "#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > ;
typedef enum < a href = "#htmlStatus" > htmlStatus< / a > ;
2022-08-24 14:34:04 +02:00
int < a href = "#UTF8ToHtml" > UTF8ToHtml< / a > (unsigned char * out, < br > int * outlen, < br > const unsigned char * in, < br > int * inlen);
< a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > < a href = "#htmlAttrAllowed" > htmlAttrAllowed< / a > (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * elt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * attr, < br > int legacy);
int < a href = "#htmlAutoCloseTag" > htmlAutoCloseTag< / a > (< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > doc, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name, < br > < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > elem);
< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > < a href = "#htmlCreateMemoryParserCtxt" > htmlCreateMemoryParserCtxt< / a > (const char * buffer, < br > int size);
< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > < a href = "#htmlCreatePushParserCtxt" > htmlCreatePushParserCtxt< / a > (< a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * user_data, < br > const char * chunk, < br > int size, < br > const char * filename, < br > < a href = "libxml2-encoding.html#xmlCharEncoding" > xmlCharEncoding< / a > enc);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlCtxtReadDoc" > htmlCtxtReadDoc< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlCtxtReadFd" > htmlCtxtReadFd< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > int fd, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlCtxtReadFile" > htmlCtxtReadFile< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * filename, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlCtxtReadIO" > htmlCtxtReadIO< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > < a href = "libxml2-xmlIO.html#xmlInputReadCallback" > xmlInputReadCallback< / a > ioread, < br > < a href = "libxml2-xmlIO.html#xmlInputCloseCallback" > xmlInputCloseCallback< / a > ioclose, < br > void * ioctx, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlCtxtReadMemory" > htmlCtxtReadMemory< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * buffer, < br > int size, < br > const char * URL, < br > const char * encoding, < br > int options);
2005-09-12 13:10:09 +00:00
void < a href = "#htmlCtxtReset" > htmlCtxtReset< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt);
2022-08-24 14:34:04 +02:00
int < a href = "#htmlCtxtUseOptions" > htmlCtxtUseOptions< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > int options);
int < a href = "#htmlElementAllowedHere" > htmlElementAllowedHere< / a > (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * parent, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * elt);
< a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > < a href = "#htmlElementStatusHere" > htmlElementStatusHere< / a > (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * parent, < br > const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * elt);
int < a href = "#htmlEncodeEntities" > htmlEncodeEntities< / a > (unsigned char * out, < br > int * outlen, < br > const unsigned char * in, < br > int * inlen, < br > int quoteChar);
2005-09-12 13:10:09 +00:00
const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * < a href = "#htmlEntityLookup" > htmlEntityLookup< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name);
2022-08-19 11:28:49 +02:00
const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * < a href = "#htmlEntityValueLookup" > htmlEntityValueLookup< / a > (unsigned int value);
2005-09-12 13:10:09 +00:00
void < a href = "#htmlFreeParserCtxt" > htmlFreeParserCtxt< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt);
2022-08-19 11:28:49 +02:00
int < a href = "#htmlHandleOmittedElem" > htmlHandleOmittedElem< / a > (int val);
2022-08-24 14:34:04 +02:00
int < a href = "#htmlIsAutoClosed" > htmlIsAutoClosed< / a > (< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > doc, < br > < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > elem);
2022-08-19 11:28:49 +02:00
int < a href = "#htmlIsScriptAttribute" > htmlIsScriptAttribute< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name);
2006-10-11 16:43:06 +00:00
< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > < a href = "#htmlNewParserCtxt" > htmlNewParserCtxt< / a > (void);
2022-08-24 14:34:04 +02:00
< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > < a href = "#htmlNewSAXParserCtxt" > htmlNewSAXParserCtxt< / a > (< a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData);
< a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > < a href = "#htmlNodeStatus" > htmlNodeStatus< / a > (const < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > node, < br > int legacy);
2022-08-19 11:28:49 +02:00
int < a href = "#htmlParseCharRef" > htmlParseCharRef< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt);
2022-08-24 14:34:04 +02:00
int < a href = "#htmlParseChunk" > htmlParseChunk< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * chunk, < br > int size, < br > int terminate);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlParseDoc" > htmlParseDoc< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * encoding);
2022-08-19 11:28:49 +02:00
int < a href = "#htmlParseDocument" > htmlParseDocument< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt);
void < a href = "#htmlParseElement" > htmlParseElement< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt);
2022-08-24 14:34:04 +02:00
const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * < a href = "#htmlParseEntityRef" > htmlParseEntityRef< / a > (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > ** str);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlParseFile" > htmlParseFile< / a > (const char * filename, < br > const char * encoding);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlReadDoc" > htmlReadDoc< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlReadFd" > htmlReadFd< / a > (int fd, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlReadFile" > htmlReadFile< / a > (const char * filename, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlReadIO" > htmlReadIO< / a > (< a href = "libxml2-xmlIO.html#xmlInputReadCallback" > xmlInputReadCallback< / a > ioread, < br > < a href = "libxml2-xmlIO.html#xmlInputCloseCallback" > xmlInputCloseCallback< / a > ioclose, < br > void * ioctx, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlReadMemory" > htmlReadMemory< / a > (const char * buffer, < br > int size, < br > const char * URL, < br > const char * encoding, < br > int options);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlSAXParseDoc" > htmlSAXParseDoc< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * encoding, < br > < a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData);
< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > < a href = "#htmlSAXParseFile" > htmlSAXParseFile< / a > (const char * filename, < br > const char * encoding, < br > < a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData);
2022-08-19 11:28:49 +02:00
const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * < a href = "#htmlTagLookup" > htmlTagLookup< / a > (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * tag);
2005-09-12 13:10:09 +00:00
< / pre >
2022-08-24 14:34:04 +02:00
< / div >
< div class = "refsect1" lang = "en" > < h2 > Description< / h2 > < / div >
< div class = "refsect1" lang = "en" >
< h2 > Details< / h2 >
< div class = "refsect2" lang = "en" >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlDefaultSubelement" > Macro < / a > htmlDefaultSubelement< / h3 >
< pre class = "programlisting" > #define < a href = "#htmlDefaultSubelement" > htmlDefaultSubelement< / a > (elt);
< / pre >
< p > Returns the default subelement for this element< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlElementAllowedHereDesc" > Macro < / a > htmlElementAllowedHereDesc< / h3 >
< pre class = "programlisting" > #define < a href = "#htmlElementAllowedHereDesc" > htmlElementAllowedHereDesc< / a > (parent, elt);
< / pre >
< p > Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > parent< / tt > < / i > :< / span > < / td >
< td > HTML parent element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlRequiredAttrs" > Macro < / a > htmlRequiredAttrs< / h3 >
< pre class = "programlisting" > #define < a href = "#htmlRequiredAttrs" > htmlRequiredAttrs< / a > (elt);
< / pre >
< p > Returns the attributes required for the specified element.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlDocPtr" > Typedef < / a > htmlDocPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlDocPtr" > xmlDocPtr< / a > htmlDocPtr;
< / pre >
< p > < / p >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlElemDesc" > Structure < / a > htmlElemDesc< / h3 >
< pre class = "programlisting" > struct _htmlElemDesc {
2005-09-12 13:10:09 +00:00
const char * name : The tag name
char startTag : Whether the start tag can be implied
char endTag : Whether the end tag can be implied
char saveEndTag : Whether the end tag should be saved
char empty : Is this an empty element ?
char depr : Is this a deprecated element ?
char dtd : 1: only in Loose DTD, 2: only Frameset one
char isinline : is this a block 0 or inline 1 element
const char * desc : the description NRK Jan.2003 * New fields encapsulating HTML structur
const char ** subelts : allowed sub-elements of this element
const char * defaultsubelt : subelement for suggested auto-repair if necessary or NULL
const char ** attrs_opt : Optional Attributes
const char ** attrs_depr : Additional deprecated attributes
const char ** attrs_req : Required attributes
} htmlElemDesc;
2022-08-24 14:34:04 +02:00
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlElemDescPtr" > Typedef < / a > htmlElemDescPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * htmlElemDescPtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlEntityDesc" > Structure < / a > htmlEntityDesc< / h3 >
< pre class = "programlisting" > struct _htmlEntityDesc {
2005-09-12 13:10:09 +00:00
unsigned int value : the UNICODE value for the character
const char * name : The entity name
const char * desc : the description
} htmlEntityDesc;
2022-08-24 14:34:04 +02:00
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlEntityDescPtr" > Typedef < / a > htmlEntityDescPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * htmlEntityDescPtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlNodePtr" > Typedef < / a > htmlNodePtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlNodePtr" > xmlNodePtr< / a > htmlNodePtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserCtxt" > Typedef < / a > htmlParserCtxt< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlParserCtxt" > xmlParserCtxt< / a > htmlParserCtxt;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserCtxtPtr" > Typedef < / a > htmlParserCtxtPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlParserCtxtPtr" > xmlParserCtxtPtr< / a > htmlParserCtxtPtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserInput" > Typedef < / a > htmlParserInput< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlParserInput" > xmlParserInput< / a > htmlParserInput;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserInputPtr" > Typedef < / a > htmlParserInputPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlParserInputPtr" > xmlParserInputPtr< / a > htmlParserInputPtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserNodeInfo" > Typedef < / a > htmlParserNodeInfo< / h3 >
< pre class = "programlisting" > < a href = "libxml2-parser.html#xmlParserNodeInfo" > xmlParserNodeInfo< / a > htmlParserNodeInfo;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParserOption" > Enum < / a > htmlParserOption< / h3 >
< pre class = "programlisting" > enum < a href = "#htmlParserOption" > htmlParserOption< / a > {
2005-09-12 13:10:09 +00:00
< a name = "HTML_PARSE_RECOVER" > HTML_PARSE_RECOVER< / a > = 1 /* Relaxed parsing */
2010-09-30 13:58:22 +02:00
< a name = "HTML_PARSE_NODEFDTD" > HTML_PARSE_NODEFDTD< / a > = 4 /* do not default a doctype if not found */
2005-09-12 13:10:09 +00:00
< a name = "HTML_PARSE_NOERROR" > HTML_PARSE_NOERROR< / a > = 32 /* suppress error reports */
< a name = "HTML_PARSE_NOWARNING" > HTML_PARSE_NOWARNING< / a > = 64 /* suppress warning reports */
< a name = "HTML_PARSE_PEDANTIC" > HTML_PARSE_PEDANTIC< / a > = 128 /* pedantic error reporting */
< a name = "HTML_PARSE_NOBLANKS" > HTML_PARSE_NOBLANKS< / a > = 256 /* remove blank nodes */
< a name = "HTML_PARSE_NONET" > HTML_PARSE_NONET< / a > = 2048 /* Forbid network access */
2010-03-15 16:21:00 +01:00
< a name = "HTML_PARSE_NOIMPLIED" > HTML_PARSE_NOIMPLIED< / a > = 8192 /* Do not add implied html/body... elements */
2012-05-23 17:10:59 +08:00
< a name = "HTML_PARSE_COMPACT" > HTML_PARSE_COMPACT< / a > = 65536 /* compact small text nodes */
< a name = "HTML_PARSE_IGNORE_ENC" > HTML_PARSE_IGNORE_ENC< / a > = 2097152 /* ignore internal document encoding hint */
2005-09-12 13:10:09 +00:00
};
2022-08-24 14:34:04 +02:00
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlSAXHandler" > Typedef < / a > htmlSAXHandler< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlSAXHandler" > xmlSAXHandler< / a > htmlSAXHandler;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlSAXHandlerPtr" > Typedef < / a > htmlSAXHandlerPtr< / h3 >
< pre class = "programlisting" > < a href = "libxml2-tree.html#xmlSAXHandlerPtr" > xmlSAXHandlerPtr< / a > htmlSAXHandlerPtr;
< / pre >
< p > < / p >
2005-09-12 13:10:09 +00:00
< / div >
2022-08-24 14:34:04 +02:00
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlStatus" > Enum < / a > htmlStatus< / h3 >
< pre class = "programlisting" > enum < a href = "#htmlStatus" > htmlStatus< / a > {
2005-09-12 13:10:09 +00:00
< a name = "HTML_NA" > HTML_NA< / a > = 0 /* something we don't check at all */
< a name = "HTML_INVALID" > HTML_INVALID< / a > = 1
< a name = "HTML_DEPRECATED" > HTML_DEPRECATED< / a > = 2
< a name = "HTML_VALID" > HTML_VALID< / a > = 4
< a name = "HTML_REQUIRED" > HTML_REQUIRED< / a > = 12 /* VALID bit set so ( & HTML_VALID ) is TRUE */
};
2022-08-24 14:34:04 +02:00
< / pre >
< p > < / p >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "UTF8ToHtml" > < / a > UTF8ToHtml ()< / h3 >
< pre class = "programlisting" > int UTF8ToHtml (unsigned char * out, < br > int * outlen, < br > const unsigned char * in, < br > int * inlen)< br >
< / pre >
< p > Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > out< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of bytes to store the result< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > outlen< / tt > < / i > :< / span > < / td >
< td > the length of @out< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > in< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of UTF-8 chars< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > inlen< / tt > < / i > :< / span > < / td >
< td > the length of @in< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlAttrAllowed" > < / a > htmlAttrAllowed ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > htmlAttrAllowed (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * elt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * attr, < br > int legacy)< br >
< / pre >
< p > Checks whether an < a href = "libxml2-SAX.html#attribute" > attribute< / a > is valid for an element Has full knowledge of Required and Deprecated attributes< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > attr< / tt > < / i > :< / span > < / td >
< td > HTML < a href = "libxml2-SAX.html#attribute" > attribute< / a >
< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > legacy< / tt > < / i > :< / span > < / td >
< td > whether to allow deprecated attributes< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, < a href = "libxml2-HTMLparser.html#HTML_INVALID" > HTML_INVALID< / a >
< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlAutoCloseTag" > < / a > htmlAutoCloseTag ()< / h3 >
< pre class = "programlisting" > int htmlAutoCloseTag (< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > doc, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name, < br > < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > elem)< br >
< / pre >
< p > The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > doc< / tt > < / i > :< / span > < / td >
< td > the HTML document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > name< / tt > < / i > :< / span > < / td >
< td > The tag name< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > elem< / tt > < / i > :< / span > < / td >
< td > the HTML element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 1 if autoclose, 0 otherwise< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCreateMemoryParserCtxt" > < / a > htmlCreateMemoryParserCtxt ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > htmlCreateMemoryParserCtxt (const char * buffer, < br > int size)< br >
< / pre >
< p > Create a parser context for an HTML in-memory document.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > buffer< / tt > < / i > :< / span > < / td >
< td > a pointer to a char array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > size< / tt > < / i > :< / span > < / td >
< td > the size of the array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the new parser context or NULL< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCreatePushParserCtxt" > < / a > htmlCreatePushParserCtxt ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > htmlCreatePushParserCtxt (< a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * user_data, < br > const char * chunk, < br > int size, < br > const char * filename, < br > < a href = "libxml2-encoding.html#xmlCharEncoding" > xmlCharEncoding< / a > enc)< br >
< / pre >
< p > Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > sax< / tt > < / i > :< / span > < / td >
< td > a SAX handler< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > user_data< / tt > < / i > :< / span > < / td >
< td > The user data returned on SAX callbacks< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > chunk< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of chars< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > size< / tt > < / i > :< / span > < / td >
< td > number of chars in the array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > filename< / tt > < / i > :< / span > < / td >
< td > an optional file name or URI< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > enc< / tt > < / i > :< / span > < / td >
< td > an optional encoding< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the new parser context or NULL< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReadDoc" > < / a > htmlCtxtReadDoc ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlCtxtReadDoc (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > cur< / tt > < / i > :< / span > < / td >
< td > a pointer to a zero terminated string< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReadFd" > < / a > htmlCtxtReadFd ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlCtxtReadFd (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > int fd, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > fd< / tt > < / i > :< / span > < / td >
< td > an open file descriptor< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReadFile" > < / a > htmlCtxtReadFile ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlCtxtReadFile (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * filename, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > filename< / tt > < / i > :< / span > < / td >
< td > a file or URL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReadIO" > < / a > htmlCtxtReadIO ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlCtxtReadIO (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > < a href = "libxml2-xmlIO.html#xmlInputReadCallback" > xmlInputReadCallback< / a > ioread, < br > < a href = "libxml2-xmlIO.html#xmlInputCloseCallback" > xmlInputCloseCallback< / a > ioclose, < br > void * ioctx, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > ioread< / tt > < / i > :< / span > < / td >
< td > an I/O read function< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > ioclose< / tt > < / i > :< / span > < / td >
< td > an I/O close function< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > ioctx< / tt > < / i > :< / span > < / td >
< td > an I/O handler< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReadMemory" > < / a > htmlCtxtReadMemory ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlCtxtReadMemory (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * buffer, < br > int size, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > buffer< / tt > < / i > :< / span > < / td >
< td > a pointer to a char array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > size< / tt > < / i > :< / span > < / td >
< td > the size of the array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtReset" > < / a > htmlCtxtReset ()< / h3 >
< pre class = "programlisting" > void htmlCtxtReset (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt)< br >
< / pre >
< p > Reset a parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlCtxtUseOptions" > < / a > htmlCtxtUseOptions ()< / h3 >
< pre class = "programlisting" > int htmlCtxtUseOptions (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > int options)< br >
< / pre >
< p > Applies the options to the parser context< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 0 in case of success, the set of unknown or unimplemented options in case of error.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlElementAllowedHere" > < / a > htmlElementAllowedHere ()< / h3 >
< pre class = "programlisting" > int htmlElementAllowedHere (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * parent, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * elt)< br >
< / pre >
< p > Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > parent< / tt > < / i > :< / span > < / td >
< td > HTML parent element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 1 if allowed; 0 otherwise.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlElementStatusHere" > < / a > htmlElementStatusHere ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > htmlElementStatusHere (const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * parent, < br > const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * elt)< br >
< / pre >
< p > Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > parent< / tt > < / i > :< / span > < / td >
< td > HTML parent element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > elt< / tt > < / i > :< / span > < / td >
< td > HTML element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > one of HTML_VALID, HTML_DEPRECATED, < a href = "libxml2-HTMLparser.html#HTML_INVALID" > HTML_INVALID< / a >
< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlEncodeEntities" > < / a > htmlEncodeEntities ()< / h3 >
< pre class = "programlisting" > int htmlEncodeEntities (unsigned char * out, < br > int * outlen, < br > const unsigned char * in, < br > int * inlen, < br > int quoteChar)< br >
< / pre >
< p > Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > out< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of bytes to store the result< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > outlen< / tt > < / i > :< / span > < / td >
< td > the length of @out< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > in< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of UTF-8 chars< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > inlen< / tt > < / i > :< / span > < / td >
< td > the length of @in< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > quoteChar< / tt > < / i > :< / span > < / td >
< td > the quote character to escape (' or ") or zero.< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlEntityLookup" > < / a > htmlEntityLookup ()< / h3 >
< pre class = "programlisting" > const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * htmlEntityLookup (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name)< br >
< / pre >
< p > Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > name< / tt > < / i > :< / span > < / td >
< td > the entity name< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the associated < a href = "libxml2-HTMLparser.html#htmlEntityDescPtr" > htmlEntityDescPtr< / a > if found, NULL otherwise.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlEntityValueLookup" > < / a > htmlEntityValueLookup ()< / h3 >
< pre class = "programlisting" > const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * htmlEntityValueLookup (unsigned int value)< br >
< / pre >
< p > Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > value< / tt > < / i > :< / span > < / td >
< td > the entity's unicode value< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the associated < a href = "libxml2-HTMLparser.html#htmlEntityDescPtr" > htmlEntityDescPtr< / a > if found, NULL otherwise.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlFreeParserCtxt" > < / a > htmlFreeParserCtxt ()< / h3 >
< pre class = "programlisting" > void htmlFreeParserCtxt (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt)< br >
< / pre >
< p > Free all the memory used by a parser context. However the parsed document in ctxt-> myDoc is not freed.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlHandleOmittedElem" > < / a > htmlHandleOmittedElem ()< / h3 >
< pre class = "programlisting" > int htmlHandleOmittedElem (int val)< br >
< / pre >
< p > Set and return the previous value for handling HTML omitted tags.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > val< / tt > < / i > :< / span > < / td >
< td > int 0 or 1< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the last value for 0 for no handling, 1 for auto insertion.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlIsAutoClosed" > < / a > htmlIsAutoClosed ()< / h3 >
< pre class = "programlisting" > int htmlIsAutoClosed (< a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > doc, < br > < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > elem)< br >
< / pre >
< p > The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > doc< / tt > < / i > :< / span > < / td >
< td > the HTML document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > elem< / tt > < / i > :< / span > < / td >
< td > the HTML element< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 1 if autoclosed, 0 otherwise< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlIsScriptAttribute" > < / a > htmlIsScriptAttribute ()< / h3 >
< pre class = "programlisting" > int htmlIsScriptAttribute (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * name)< br >
< / pre >
< p > Check if an < a href = "libxml2-SAX.html#attribute" > attribute< / a > is of content type Script< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > name< / tt > < / i > :< / span > < / td >
< td > an < a href = "libxml2-SAX.html#attribute" > attribute< / a > name< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 1 is the < a href = "libxml2-SAX.html#attribute" > attribute< / a > is a script 0 otherwise< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlNewParserCtxt" > < / a > htmlNewParserCtxt ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > htmlNewParserCtxt (void)< br >
< / pre >
< p > Allocate and initialize a new parser context.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > or NULL in case of allocation error< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlNewSAXParserCtxt" > < / a > htmlNewSAXParserCtxt ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > htmlNewSAXParserCtxt (< a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData)< br >
< / pre >
< p > Allocate and initialize a new parser context.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > sax< / tt > < / i > :< / span > < / td >
< td > SAX handler< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > userData< / tt > < / i > :< / span > < / td >
< td > user data< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the < a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > or NULL in case of allocation error< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlNodeStatus" > < / a > htmlNodeStatus ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlStatus" > htmlStatus< / a > htmlNodeStatus (const < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > node, < br > int legacy)< br >
< / pre >
< p > Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > node< / tt > < / i > :< / span > < / td >
< td > an < a href = "libxml2-HTMLparser.html#htmlNodePtr" > htmlNodePtr< / a > in a tree< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > legacy< / tt > < / i > :< / span > < / td >
< td > whether to allow deprecated elements (YES is faster here for Element nodes)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > for Element nodes, a return from < a href = "libxml2-HTMLparser.html#htmlElementAllowedHere" > htmlElementAllowedHere< / a > (if legacy allowed) or < a href = "libxml2-HTMLparser.html#htmlElementStatusHere" > htmlElementStatusHere< / a > (otherwise). for Attribute nodes, a return from < a href = "libxml2-HTMLparser.html#htmlAttrAllowed" > htmlAttrAllowed< / a > for other nodes, < a href = "libxml2-HTMLparser.html#HTML_NA" > HTML_NA< / a > (no checks performed)< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseCharRef" > < / a > htmlParseCharRef ()< / h3 >
< pre class = "programlisting" > int htmlParseCharRef (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt)< br >
< / pre >
< p > parse Reference declarations [66] CharRef ::= '& #' [0-9]+ ';' | '& #x' [0-9a-fA-F]+ ';'< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the value parsed (as an int)< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseChunk" > < / a > htmlParseChunk ()< / h3 >
< pre class = "programlisting" > int htmlParseChunk (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const char * chunk, < br > int size, < br > int terminate)< br >
< / pre >
< p > Parse a Chunk of memory< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > chunk< / tt > < / i > :< / span > < / td >
< td > an char array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > size< / tt > < / i > :< / span > < / td >
< td > the size in byte of the chunk< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > terminate< / tt > < / i > :< / span > < / td >
< td > last chunk indicator< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > zero if no error, the < a href = "libxml2-xmlerror.html#xmlParserErrors" > xmlParserErrors< / a > otherwise.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseDoc" > < / a > htmlParseDoc ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlParseDoc (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * encoding)< br >
< / pre >
< p > parse an HTML in-memory document and build a tree.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > cur< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a >
< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > a free form C string describing the HTML document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseDocument" > < / a > htmlParseDocument ()< / h3 >
< pre class = "programlisting" > int htmlParseDocument (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt)< br >
< / pre >
< p > parse an HTML document (and build a tree if using the standard SAX interface).< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > 0, -1 in case of error. the parser context is augmented as a result of the parsing.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseElement" > < / a > htmlParseElement ()< / h3 >
< pre class = "programlisting" > void htmlParseElement (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt)< br >
< / pre >
< p > parse an HTML element, this is highly recursive this is kept for compatibility with previous code versions [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody > < tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr > < / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseEntityRef" > < / a > htmlParseEntityRef ()< / h3 >
< pre class = "programlisting" > const < a href = "libxml2-HTMLparser.html#htmlEntityDesc" > htmlEntityDesc< / a > * htmlParseEntityRef (< a href = "libxml2-HTMLparser.html#htmlParserCtxtPtr" > htmlParserCtxtPtr< / a > ctxt, < br > const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > ** str)< br >
< / pre >
< p > parse an HTML ENTITY references [68] EntityRef ::= '& ' Name ';'< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ctxt< / tt > < / i > :< / span > < / td >
< td > an HTML parser context< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > str< / tt > < / i > :< / span > < / td >
< td > location to store the entity name< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the associated < a href = "libxml2-HTMLparser.html#htmlEntityDescPtr" > htmlEntityDescPtr< / a > if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlParseFile" > < / a > htmlParseFile ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlParseFile (const char * filename, < br > const char * encoding)< br >
< / pre >
< p > parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > filename< / tt > < / i > :< / span > < / td >
< td > the filename< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > a free form C string describing the HTML document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlReadDoc" > < / a > htmlReadDoc ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlReadDoc (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML in-memory document and build a tree.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > cur< / tt > < / i > :< / span > < / td >
< td > a pointer to a zero terminated string< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlReadFd" > < / a > htmlReadFd ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlReadFd (int fd, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an HTML from a file descriptor and build a tree. NOTE that the file descriptor will not be closed when the reader is closed or reset.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > fd< / tt > < / i > :< / span > < / td >
< td > an open file descriptor< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlReadFile" > < / a > htmlReadFile ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlReadFile (const char * filename, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML file from the filesystem or the network.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > filename< / tt > < / i > :< / span > < / td >
< td > a file or URL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlReadIO" > < / a > htmlReadIO ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlReadIO (< a href = "libxml2-xmlIO.html#xmlInputReadCallback" > xmlInputReadCallback< / a > ioread, < br > < a href = "libxml2-xmlIO.html#xmlInputCloseCallback" > xmlInputCloseCallback< / a > ioclose, < br > void * ioctx, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an HTML document from I/O functions and source and build a tree.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > ioread< / tt > < / i > :< / span > < / td >
< td > an I/O read function< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > ioclose< / tt > < / i > :< / span > < / td >
< td > an I/O close function< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > ioctx< / tt > < / i > :< / span > < / td >
< td > an I/O handler< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlReadMemory" > < / a > htmlReadMemory ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlReadMemory (const char * buffer, < br > int size, < br > const char * URL, < br > const char * encoding, < br > int options)< br >
< / pre >
< p > parse an XML in-memory document and build a tree.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > buffer< / tt > < / i > :< / span > < / td >
< td > a pointer to a char array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > size< / tt > < / i > :< / span > < / td >
< td > the size of the array< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > URL< / tt > < / i > :< / span > < / td >
< td > the base URL to use for the document< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > the document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > options< / tt > < / i > :< / span > < / td >
< td > a combination of htmlParserOption(s)< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlSAXParseDoc" > < / a > htmlSAXParseDoc ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlSAXParseDoc (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * cur, < br > const char * encoding, < br > < a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData)< br >
< / pre >
< p > Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > cur< / tt > < / i > :< / span > < / td >
< td > a pointer to an array of < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a >
< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > a free form C string describing the HTML document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > sax< / tt > < / i > :< / span > < / td >
< td > the SAX handler block< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > userData< / tt > < / i > :< / span > < / td >
< td > if using SAX, this pointer will be provided on callbacks.< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree unless SAX is NULL or the document is not well formed.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlSAXParseFile" > < / a > htmlSAXParseFile ()< / h3 >
< pre class = "programlisting" > < a href = "libxml2-HTMLparser.html#htmlDocPtr" > htmlDocPtr< / a > htmlSAXParseFile (const char * filename, < br > const char * encoding, < br > < a href = "libxml2-HTMLparser.html#htmlSAXHandlerPtr" > htmlSAXHandlerPtr< / a > sax, < br > void * userData)< br >
< / pre >
< p > parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > filename< / tt > < / i > :< / span > < / td >
< td > the filename< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > encoding< / tt > < / i > :< / span > < / td >
< td > a free form C string describing the HTML document encoding, or NULL< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > sax< / tt > < / i > :< / span > < / td >
< td > the SAX handler block< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > userData< / tt > < / i > :< / span > < / td >
< td > if using SAX, this pointer will be provided on callbacks.< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the resulting document tree unless SAX is NULL or the document is not well formed.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< div class = "refsect2" lang = "en" >
< h3 >
< a name = "htmlTagLookup" > < / a > htmlTagLookup ()< / h3 >
< pre class = "programlisting" > const < a href = "libxml2-HTMLparser.html#htmlElemDesc" > htmlElemDesc< / a > * htmlTagLookup (const < a href = "libxml2-xmlstring.html#xmlChar" > xmlChar< / a > * tag)< br >
< / pre >
< p > Lookup the HTML tag in the ElementTable< / p >
< div class = "variablelist" > < table border = "0" >
< col align = "left" >
< tbody >
< tr >
< td > < span class = "term" > < i > < tt > tag< / tt > < / i > :< / span > < / td >
< td > The tag name in lowercase< / td >
< / tr >
< tr >
< td > < span class = "term" > < i > < tt > Returns< / tt > < / i > :< / span > < / td >
< td > the related < a href = "libxml2-HTMLparser.html#htmlElemDescPtr" > htmlElemDescPtr< / a > or NULL if not found.< / td >
< / tr >
< / tbody >
< / table > < / div >
< / div >
< hr >
< / div >
< / div >
< / body >
2005-09-12 13:10:09 +00:00
< / html >