2022-08-26 01:22:33 +02:00
|
|
|
#ifndef XML_PARSER_H_PRIVATE__
|
|
|
|
#define XML_PARSER_H_PRIVATE__
|
|
|
|
|
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/xmlversion.h>
|
|
|
|
|
2024-07-06 14:58:16 +02:00
|
|
|
#define XML_INVALID_CHAR 0x200000
|
|
|
|
|
2024-07-01 16:01:24 +02:00
|
|
|
#define XML_MAX_URI_LENGTH 2000
|
|
|
|
|
2022-08-26 01:22:33 +02:00
|
|
|
/**
|
|
|
|
* XML_VCTXT_DTD_VALIDATED:
|
|
|
|
*
|
|
|
|
* Set after xmlValidateDtdFinal was called.
|
|
|
|
*/
|
|
|
|
#define XML_VCTXT_DTD_VALIDATED (1u << 0)
|
|
|
|
/**
|
|
|
|
* XML_VCTXT_USE_PCTXT:
|
|
|
|
*
|
|
|
|
* Set if the validation context is part of a parser context.
|
|
|
|
*/
|
|
|
|
#define XML_VCTXT_USE_PCTXT (1u << 1)
|
|
|
|
|
2025-03-13 23:20:16 +01:00
|
|
|
/*
|
|
|
|
* TODO: Rename to avoid confusion with xmlParserInputFlags
|
|
|
|
*/
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
#define XML_INPUT_HAS_ENCODING (1u << 0)
|
|
|
|
#define XML_INPUT_AUTO_ENCODING (7u << 1)
|
|
|
|
#define XML_INPUT_AUTO_UTF8 (1u << 1)
|
|
|
|
#define XML_INPUT_AUTO_UTF16LE (2u << 1)
|
|
|
|
#define XML_INPUT_AUTO_UTF16BE (3u << 1)
|
|
|
|
#define XML_INPUT_AUTO_OTHER (4u << 1)
|
2023-08-16 19:43:02 +02:00
|
|
|
#define XML_INPUT_USES_ENC_DECL (1u << 4)
|
2023-08-09 16:59:36 +02:00
|
|
|
#define XML_INPUT_ENCODING_ERROR (1u << 5)
|
2023-12-26 03:13:05 +01:00
|
|
|
#define XML_INPUT_PROGRESSIVE (1u << 6)
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)
|
|
|
|
|
2023-12-26 03:13:05 +01:00
|
|
|
#define PARSER_PROGRESSIVE(ctxt) \
|
|
|
|
((ctxt)->input->flags & XML_INPUT_PROGRESSIVE)
|
|
|
|
|
2023-12-26 02:10:35 +01:00
|
|
|
#define PARSER_IN_PE(ctxt) \
|
|
|
|
(((ctxt)->input->entity != NULL) && \
|
|
|
|
(((ctxt)->input->entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || \
|
|
|
|
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
|
|
|
|
|
|
|
|
#define PARSER_EXTERNAL(ctxt) \
|
|
|
|
(((ctxt)->inSubset == 2) || \
|
|
|
|
(((ctxt)->input->entity != NULL) && \
|
|
|
|
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
|
|
|
|
|
2024-12-19 21:05:49 +01:00
|
|
|
/**
|
|
|
|
* INPUT_CHUNK:
|
|
|
|
*
|
|
|
|
* The parser tries to always have that amount of input ready.
|
|
|
|
* One of the point is providing context when reporting errors.
|
|
|
|
*/
|
|
|
|
#define INPUT_CHUNK 250
|
|
|
|
|
2024-09-15 20:28:49 +02:00
|
|
|
struct _xmlAttrHashBucket {
|
|
|
|
int index;
|
|
|
|
};
|
|
|
|
|
2023-12-18 19:31:29 +01:00
|
|
|
XML_HIDDEN void
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
|
|
|
xmlParserErrors code, xmlErrorLevel level,
|
|
|
|
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
|
|
|
int int1, const char *msg, va_list ap);
|
2023-12-10 17:50:22 +01:00
|
|
|
XML_HIDDEN void
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
|
|
|
xmlParserErrors code, xmlErrorLevel level,
|
|
|
|
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
|
|
|
int int1, const char *msg, ...);
|
2022-11-27 02:09:27 +01:00
|
|
|
XML_HIDDEN void
|
2023-04-30 17:51:29 +02:00
|
|
|
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
XML_HIDDEN void LIBXML_ATTR_FORMAT(3,0)
|
|
|
|
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
|
|
|
const char *msg, const xmlChar *str1, const xmlChar *str2);
|
2024-05-20 13:10:41 +02:00
|
|
|
XML_HIDDEN void
|
|
|
|
xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri);
|
2024-11-25 20:59:06 +01:00
|
|
|
XML_HIDDEN int
|
|
|
|
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt);
|
2024-05-20 13:10:41 +02:00
|
|
|
|
2023-04-30 17:51:29 +02:00
|
|
|
XML_HIDDEN void
|
2023-03-14 14:42:36 +01:00
|
|
|
xmlHaltParser(xmlParserCtxtPtr ctxt);
|
2023-03-12 16:47:15 +01:00
|
|
|
XML_HIDDEN int
|
|
|
|
xmlParserGrow(xmlParserCtxtPtr ctxt);
|
2023-03-21 13:08:44 +01:00
|
|
|
XML_HIDDEN void
|
2023-03-13 17:51:13 +01:00
|
|
|
xmlParserShrink(xmlParserCtxtPtr ctxt);
|
2022-08-26 01:22:33 +02:00
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
XML_HIDDEN void
|
|
|
|
xmlDetectEncoding(xmlParserCtxtPtr ctxt);
|
|
|
|
XML_HIDDEN void
|
|
|
|
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding);
|
2024-02-26 15:14:28 +01:00
|
|
|
XML_HIDDEN const xmlChar *
|
|
|
|
xmlGetActualEncoding(xmlParserCtxtPtr ctxt);
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
|
2025-03-04 13:02:36 +01:00
|
|
|
XML_HIDDEN int
|
|
|
|
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value);
|
|
|
|
XML_HIDDEN xmlNodePtr
|
|
|
|
nodePop(xmlParserCtxtPtr ctxt);
|
|
|
|
|
2023-10-02 12:16:05 +02:00
|
|
|
XML_HIDDEN xmlParserNsData *
|
|
|
|
xmlParserNsCreate(void);
|
|
|
|
XML_HIDDEN void
|
|
|
|
xmlParserNsFree(xmlParserNsData *nsdb);
|
2023-09-29 00:18:44 +02:00
|
|
|
/*
|
|
|
|
* These functions allow SAX handlers to attach extra data to namespaces
|
|
|
|
* efficiently and should be made public.
|
|
|
|
*/
|
|
|
|
XML_HIDDEN int
|
|
|
|
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
|
|
|
|
void *saxData);
|
|
|
|
XML_HIDDEN void *
|
|
|
|
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix);
|
|
|
|
|
2024-06-11 19:10:41 +02:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
|
|
|
xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
|
|
|
|
xmlResourceType type);
|
2024-01-23 00:47:44 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
|
2025-03-13 23:20:16 +01:00
|
|
|
const char *publicId, const char *encoding,
|
|
|
|
xmlParserInputFlags flags);
|
2024-01-23 00:47:44 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
const void *mem, size_t size,
|
2025-03-13 23:20:16 +01:00
|
|
|
const char *encoding,
|
|
|
|
xmlParserInputFlags flags);
|
2024-01-23 00:47:44 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
|
2025-03-13 23:20:16 +01:00
|
|
|
const char *str, const char *encoding,
|
|
|
|
xmlParserInputFlags flags);
|
2024-01-23 00:47:44 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *filename, int fd,
|
2025-03-13 23:20:16 +01:00
|
|
|
const char *encoding, xmlParserInputFlags flags);
|
2024-01-23 00:47:44 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
xmlInputReadCallback ioRead,
|
|
|
|
xmlInputCloseCallback ioClose,
|
|
|
|
void *ioCtxt,
|
2025-03-13 23:20:16 +01:00
|
|
|
const char *encoding, xmlParserInputFlags flags);
|
2023-12-27 18:33:30 +01:00
|
|
|
XML_HIDDEN xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewPushInput(const char *url, const char *chunk, int size);
|
2023-12-27 18:33:30 +01:00
|
|
|
|
2023-12-30 02:50:34 +01:00
|
|
|
XML_HIDDEN xmlChar *
|
|
|
|
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
|
|
|
int normalize);
|
|
|
|
|
2025-03-10 02:18:51 +01:00
|
|
|
XML_HIDDEN void
|
|
|
|
xmlParserCheckEOF(xmlParserCtxtPtr ctxt, xmlParserErrors code);
|
|
|
|
|
2022-08-26 01:22:33 +02:00
|
|
|
#endif /* XML_PARSER_H_PRIVATE__ */
|