Nick Wellnhofer 37c6618be5 parser: Rework parsing of attribute and entity values
Don't use a separate function to handle "complex" attributes. Validate
UTF-8 byte sequences without decoding. This should improve performance
considerably when parsing multi-byte UTF-8 sequences.

Use a string buffer to avoid unnecessary allocations and copying when
expanding entities.

Normalize attribute values in a single pass while expanding entities.

Be more lenient in recovery mode.

If no entity substitution was requested, validate entities without
expanding. Fixes #596.

Also fixes #655.
2024-01-02 15:42:03 +01:00

95 lines
3.1 KiB
C

#ifndef XML_PARSER_H_PRIVATE__
#define XML_PARSER_H_PRIVATE__
#include <libxml/parser.h>
#include <libxml/xmlversion.h>
/**
* XML_VCTXT_DTD_VALIDATED:
*
* Set after xmlValidateDtdFinal was called.
*/
#define XML_VCTXT_DTD_VALIDATED (1u << 0)
/**
* XML_VCTXT_USE_PCTXT:
*
* Set if the validation context is part of a parser context.
*/
#define XML_VCTXT_USE_PCTXT (1u << 1)
#define XML_INPUT_HAS_ENCODING (1u << 0)
#define XML_INPUT_AUTO_ENCODING (7u << 1)
#define XML_INPUT_AUTO_UTF8 (1u << 1)
#define XML_INPUT_AUTO_UTF16LE (2u << 1)
#define XML_INPUT_AUTO_UTF16BE (3u << 1)
#define XML_INPUT_AUTO_OTHER (4u << 1)
#define XML_INPUT_USES_ENC_DECL (1u << 4)
#define XML_INPUT_ENCODING_ERROR (1u << 5)
#define XML_INPUT_PROGRESSIVE (1u << 6)
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)
#define PARSER_PROGRESSIVE(ctxt) \
((ctxt)->input->flags & XML_INPUT_PROGRESSIVE)
#define PARSER_IN_PE(ctxt) \
(((ctxt)->input->entity != NULL) && \
(((ctxt)->input->entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || \
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
#define PARSER_EXTERNAL(ctxt) \
(((ctxt)->inSubset == 2) || \
(((ctxt)->input->entity != NULL) && \
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
XML_HIDDEN void
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
xmlParserErrors code, xmlErrorLevel level,
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
int int1, const char *msg, va_list ap);
XML_HIDDEN void
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
xmlParserErrors code, xmlErrorLevel level,
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
int int1, const char *msg, ...);
XML_HIDDEN void
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
XML_HIDDEN void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar *str1, const xmlChar *str2);
XML_HIDDEN void
xmlHaltParser(xmlParserCtxtPtr ctxt);
XML_HIDDEN int
xmlParserGrow(xmlParserCtxtPtr ctxt);
XML_HIDDEN void
xmlParserShrink(xmlParserCtxtPtr ctxt);
XML_HIDDEN void
xmlDetectEncoding(xmlParserCtxtPtr ctxt);
XML_HIDDEN void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding);
XML_HIDDEN xmlParserNsData *
xmlParserNsCreate(void);
XML_HIDDEN void
xmlParserNsFree(xmlParserNsData *nsdb);
/*
* These functions allow SAX handlers to attach extra data to namespaces
* efficiently and should be made public.
*/
XML_HIDDEN int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
void *saxData);
XML_HIDDEN void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix);
XML_HIDDEN xmlParserInputPtr
xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
const char *chunk, int size, const char *encoding);
XML_HIDDEN xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
int normalize);
#endif /* XML_PARSER_H_PRIVATE__ */