2001-02-23 17:55:21 +00:00
|
|
|
/*
|
2001-12-31 16:16:02 +00:00
|
|
|
* parserInternals.c : Internal routines (and obsolete ones) needed for the
|
|
|
|
* XML and HTML parsers.
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
|
|
|
* See Copyright for the status of this software.
|
|
|
|
*
|
2001-06-24 12:13:24 +00:00
|
|
|
* daniel@veillard.com
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
|
|
|
|
2002-03-18 19:37:11 +00:00
|
|
|
#define IN_LIBXML
|
2001-04-21 16:57:29 +00:00
|
|
|
#include "libxml.h"
|
|
|
|
|
2022-02-28 22:42:10 +01:00
|
|
|
#if defined(_WIN32)
|
2001-02-23 17:55:21 +00:00
|
|
|
#define XML_DIR_SEP '\\'
|
|
|
|
#else
|
|
|
|
#define XML_DIR_SEP '/'
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdlib.h>
|
2022-03-02 00:29:17 +01:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
#include <libxml/xmlmemory.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/parserInternals.h>
|
|
|
|
#include <libxml/entities.h>
|
|
|
|
#include <libxml/xmlerror.h>
|
|
|
|
#include <libxml/encoding.h>
|
|
|
|
#include <libxml/xmlIO.h>
|
|
|
|
#include <libxml/uri.h>
|
2003-08-18 12:15:38 +00:00
|
|
|
#include <libxml/dict.h>
|
2023-09-20 18:54:39 +02:00
|
|
|
#include <libxml/xmlsave.h>
|
2001-08-22 14:29:45 +00:00
|
|
|
#ifdef LIBXML_CATALOG_ENABLED
|
|
|
|
#include <libxml/catalog.h>
|
|
|
|
#endif
|
2003-10-11 15:22:13 +00:00
|
|
|
#include <libxml/chvalid.h>
|
2023-12-23 00:35:30 +01:00
|
|
|
#include <libxml/nanohttp.h>
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2016-03-01 15:18:04 -08:00
|
|
|
#define CUR(ctxt) ctxt->input->cur
|
|
|
|
#define END(ctxt) ctxt->input->end
|
|
|
|
|
2022-08-26 01:22:33 +02:00
|
|
|
#include "private/buf.h"
|
|
|
|
#include "private/enc.h"
|
|
|
|
#include "private/error.h"
|
|
|
|
#include "private/io.h"
|
2024-12-15 23:36:04 +01:00
|
|
|
#include "private/memory.h"
|
2022-08-26 01:22:33 +02:00
|
|
|
#include "private/parser.h"
|
2012-07-16 14:19:49 +08:00
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
#define XML_MAX_ERRORS 100
|
|
|
|
|
2023-08-20 20:48:10 +02:00
|
|
|
/*
|
|
|
|
* XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
|
|
|
|
* factor of serialized output after entity expansion.
|
|
|
|
*/
|
|
|
|
#define XML_MAX_AMPLIFICATION_DEFAULT 5
|
|
|
|
|
2001-07-25 17:18:57 +00:00
|
|
|
/*
|
|
|
|
* Various global defaults for parsing
|
|
|
|
*/
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2001-07-18 19:30:27 +00:00
|
|
|
/**
|
2001-02-23 17:55:21 +00:00
|
|
|
* xmlCheckVersion:
|
|
|
|
* @version: the include version number
|
|
|
|
*
|
|
|
|
* check the compiled lib version against the include one.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCheckVersion(int version) {
|
2022-09-01 01:18:30 +02:00
|
|
|
int myversion = LIBXML_VERSION;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2001-10-14 09:56:15 +00:00
|
|
|
xmlInitParser();
|
2001-05-07 20:50:47 +00:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
if ((myversion / 10000) != (version / 10000)) {
|
2024-07-15 14:35:47 +02:00
|
|
|
xmlPrintErrorMessage(
|
2001-11-20 08:35:07 +00:00
|
|
|
"Fatal: program compiled against libxml %d using libxml %d\n",
|
|
|
|
(version / 10000), (myversion / 10000));
|
2023-12-18 19:31:29 +01:00
|
|
|
} else if ((myversion / 100) < (version / 100)) {
|
2024-07-15 14:35:47 +02:00
|
|
|
xmlPrintErrorMessage(
|
2001-02-23 17:55:21 +00:00
|
|
|
"Warning: program compiled against libxml %d using older %d\n",
|
|
|
|
(version / 100), (myversion / 100));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-10-05 21:33:18 +00:00
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* *
|
2012-09-11 13:26:36 +08:00
|
|
|
* Some factorized error routines *
|
2003-10-05 21:33:18 +00:00
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2023-12-18 19:31:29 +01:00
|
|
|
* xmlCtxtSetErrorHandler:
|
2003-10-05 21:33:18 +00:00
|
|
|
* @ctxt: an XML parser context
|
2023-12-18 19:31:29 +01:00
|
|
|
* @handler: error handler
|
|
|
|
* @data: data for error handler
|
2003-10-05 21:33:18 +00:00
|
|
|
*
|
2023-12-21 17:30:38 +01:00
|
|
|
* Register a callback function that will be called on errors and
|
|
|
|
* warnings. If handler is NULL, the error handler will be deactivated.
|
|
|
|
*
|
|
|
|
* This is the recommended way to collect errors from the parser and
|
|
|
|
* takes precedence over all other error reporting mechanisms.
|
|
|
|
* These are (in order of precedence):
|
|
|
|
*
|
|
|
|
* - per-context structured handler (xmlCtxtSetErrorHandler)
|
|
|
|
* - per-context structured "serror" SAX handler
|
|
|
|
* - global structured handler (xmlSetStructuredErrorFunc)
|
|
|
|
* - per-context generic "error" and "warning" SAX handlers
|
|
|
|
* - global generic handler (xmlSetGenericErrorFunc)
|
|
|
|
* - print to stderr
|
|
|
|
*
|
|
|
|
* Available since 2.13.0.
|
2003-10-05 21:33:18 +00:00
|
|
|
*/
|
|
|
|
void
|
2023-12-18 19:31:29 +01:00
|
|
|
xmlCtxtSetErrorHandler(xmlParserCtxtPtr ctxt, xmlStructuredErrorFunc handler,
|
|
|
|
void *data)
|
2023-12-10 17:50:22 +01:00
|
|
|
{
|
2023-12-18 19:31:29 +01:00
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
ctxt->errorHandler = handler;
|
|
|
|
ctxt->errorCtxt = data;
|
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
|
2024-06-25 23:19:56 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtGetLastError:
|
|
|
|
* @ctx: an XML parser context
|
|
|
|
*
|
|
|
|
* Get the last parsing error registered.
|
|
|
|
*
|
|
|
|
* Returns NULL if no error occurred or a pointer to the error
|
|
|
|
*/
|
|
|
|
const xmlError *
|
|
|
|
xmlCtxtGetLastError(void *ctx)
|
|
|
|
{
|
|
|
|
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
|
|
|
|
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return (NULL);
|
|
|
|
if (ctxt->lastError.code == XML_ERR_OK)
|
|
|
|
return (NULL);
|
|
|
|
return (&ctxt->lastError);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtResetLastError:
|
|
|
|
* @ctx: an XML parser context
|
|
|
|
*
|
|
|
|
* Cleanup the last global error registered. For parsing error
|
|
|
|
* this does not change the well-formedness result.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtResetLastError(void *ctx)
|
|
|
|
{
|
|
|
|
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
|
|
|
|
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
ctxt->errNo = XML_ERR_OK;
|
|
|
|
if (ctxt->lastError.code == XML_ERR_OK)
|
|
|
|
return;
|
|
|
|
xmlResetError(&ctxt->lastError);
|
|
|
|
}
|
|
|
|
|
2023-12-18 19:31:29 +01:00
|
|
|
/**
|
|
|
|
* xmlCtxtErrMemory:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
2024-05-20 13:58:22 +02:00
|
|
|
* Handle an out-of-memory error.
|
|
|
|
*
|
|
|
|
* Available since 2.13.0.
|
2023-12-18 19:31:29 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtErrMemory(xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
|
|
|
xmlStructuredErrorFunc schannel = NULL;
|
|
|
|
xmlGenericErrorFunc channel = NULL;
|
|
|
|
void *data;
|
2023-12-10 17:50:22 +01:00
|
|
|
|
2024-12-13 16:45:38 +01:00
|
|
|
if (ctxt == NULL) {
|
|
|
|
xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
|
2024-06-12 13:32:32 +02:00
|
|
|
return;
|
2024-12-13 16:45:38 +01:00
|
|
|
}
|
2024-06-12 13:32:32 +02:00
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
ctxt->errNo = XML_ERR_NO_MEMORY;
|
|
|
|
ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
|
|
|
|
ctxt->wellFormed = 0;
|
|
|
|
ctxt->disableSAX = 2;
|
|
|
|
|
2023-12-18 19:31:29 +01:00
|
|
|
if (ctxt->errorHandler) {
|
|
|
|
schannel = ctxt->errorHandler;
|
|
|
|
data = ctxt->errorCtxt;
|
|
|
|
} else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
2023-12-10 17:50:22 +01:00
|
|
|
(ctxt->sax->serror != NULL)) {
|
2023-12-18 19:31:29 +01:00
|
|
|
schannel = ctxt->sax->serror;
|
|
|
|
data = ctxt->userData;
|
2023-12-10 17:50:22 +01:00
|
|
|
} else {
|
2023-12-18 19:31:29 +01:00
|
|
|
channel = ctxt->sax->error;
|
|
|
|
data = ctxt->userData;
|
2023-12-10 17:50:22 +01:00
|
|
|
}
|
2023-12-18 19:31:29 +01:00
|
|
|
|
|
|
|
xmlRaiseMemoryError(schannel, channel, data, XML_FROM_PARSER,
|
|
|
|
&ctxt->lastError);
|
2023-12-10 17:50:22 +01:00
|
|
|
}
|
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
/**
|
|
|
|
* xmlCtxtErrIO:
|
|
|
|
* @ctxt: parser context
|
|
|
|
* @code: xmlParserErrors code
|
|
|
|
* @uri: filename or URI (optional)
|
|
|
|
*
|
|
|
|
* If filename is empty, use the one from context input if available.
|
|
|
|
*
|
|
|
|
* Report an IO error to the parser context.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtErrIO(xmlParserCtxtPtr ctxt, int code, const char *uri)
|
|
|
|
{
|
|
|
|
const char *errstr, *msg, *str1, *str2;
|
|
|
|
xmlErrorLevel level;
|
|
|
|
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
2024-06-14 19:42:40 +02:00
|
|
|
if (((code == XML_IO_ENOENT) ||
|
2024-01-22 21:02:16 +01:00
|
|
|
(code == XML_IO_UNKNOWN))) {
|
2024-07-04 15:15:17 +02:00
|
|
|
/*
|
|
|
|
* Only report a warning if a file could not be found. This should
|
|
|
|
* only be done for external entities, but the external entity loader
|
|
|
|
* of xsltproc can try multiple paths and assumes that ENOENT doesn't
|
|
|
|
* raise an error and aborts parsing.
|
|
|
|
*/
|
2023-12-23 00:35:30 +01:00
|
|
|
if (ctxt->validate == 0)
|
|
|
|
level = XML_ERR_WARNING;
|
|
|
|
else
|
|
|
|
level = XML_ERR_ERROR;
|
2024-07-04 15:15:17 +02:00
|
|
|
} else if (code == XML_IO_NETWORK_ATTEMPT) {
|
|
|
|
level = XML_ERR_ERROR;
|
2023-12-23 00:35:30 +01:00
|
|
|
} else {
|
|
|
|
level = XML_ERR_FATAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
errstr = xmlErrString(code);
|
|
|
|
|
|
|
|
if (uri == NULL) {
|
|
|
|
msg = "%s\n";
|
|
|
|
str1 = errstr;
|
|
|
|
str2 = NULL;
|
|
|
|
} else {
|
|
|
|
msg = "failed to load \"%s\": %s\n";
|
|
|
|
str1 = uri;
|
|
|
|
str2 = errstr;
|
|
|
|
}
|
|
|
|
|
|
|
|
xmlCtxtErr(ctxt, NULL, XML_FROM_IO, code, level,
|
|
|
|
(const xmlChar *) uri, NULL, NULL, 0,
|
|
|
|
msg, str1, str2);
|
|
|
|
}
|
|
|
|
|
2024-12-26 21:05:18 +01:00
|
|
|
/**
|
|
|
|
* xmlCtxtIsCatastrophicError:
|
|
|
|
* @ctxt: parser context
|
|
|
|
*
|
|
|
|
* Returns true if the last error is catastrophic.
|
|
|
|
*/
|
2024-11-25 20:59:06 +01:00
|
|
|
int
|
|
|
|
xmlCtxtIsCatastrophicError(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return(1);
|
|
|
|
|
|
|
|
return(xmlIsCatastrophicError(ctxt->lastError.level,
|
|
|
|
ctxt->lastError.code));
|
|
|
|
}
|
|
|
|
|
2024-05-20 13:58:22 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtVErr:
|
|
|
|
* @ctxt: a parser context
|
|
|
|
* @node: the current node or NULL
|
|
|
|
* @domain: the domain for the error
|
|
|
|
* @code: the code for the error
|
|
|
|
* @level: the xmlErrorLevel for the error
|
|
|
|
* @str1: extra string info
|
|
|
|
* @str2: extra string info
|
|
|
|
* @str3: extra string info
|
|
|
|
* @int1: extra int info
|
|
|
|
* @msg: the message to display/transmit
|
|
|
|
* @ap: extra parameters for the message display
|
|
|
|
*
|
|
|
|
* Raise a parser error.
|
|
|
|
*/
|
2023-12-10 17:50:22 +01:00
|
|
|
void
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
|
|
|
xmlParserErrors code, xmlErrorLevel level,
|
|
|
|
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
|
|
|
int int1, const char *msg, va_list ap)
|
2003-10-05 21:33:18 +00:00
|
|
|
{
|
2023-12-10 17:50:22 +01:00
|
|
|
xmlStructuredErrorFunc schannel = NULL;
|
2023-12-18 19:31:29 +01:00
|
|
|
xmlGenericErrorFunc channel = NULL;
|
2023-12-18 22:48:24 +01:00
|
|
|
void *data = NULL;
|
2023-12-10 17:50:22 +01:00
|
|
|
const char *file = NULL;
|
|
|
|
int line = 0;
|
|
|
|
int col = 0;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
if (code == XML_ERR_NO_MEMORY) {
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
2023-12-10 17:50:22 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2024-12-13 16:45:38 +01:00
|
|
|
if (ctxt == NULL) {
|
|
|
|
res = xmlVRaiseError(NULL, NULL, NULL, NULL, node, domain, code,
|
|
|
|
level, NULL, 0, (const char *) str1,
|
|
|
|
(const char *) str2, (const char *) str3,
|
|
|
|
int1, 0, msg, ap);
|
|
|
|
if (res < 0)
|
|
|
|
xmlRaiseMemoryError(NULL, NULL, NULL, XML_FROM_PARSER, NULL);
|
|
|
|
|
2024-07-10 03:27:47 +02:00
|
|
|
return;
|
2024-12-13 16:45:38 +01:00
|
|
|
}
|
2024-07-10 03:27:47 +02:00
|
|
|
|
2024-02-13 12:22:28 +01:00
|
|
|
if (PARSER_STOPPED(ctxt))
|
|
|
|
return;
|
|
|
|
|
2024-11-25 20:59:06 +01:00
|
|
|
/* Don't overwrite catastrophic errors */
|
|
|
|
if (xmlCtxtIsCatastrophicError(ctxt))
|
|
|
|
return;
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
if (level == XML_ERR_WARNING) {
|
|
|
|
if (ctxt->nbWarnings >= XML_MAX_ERRORS)
|
2024-11-25 20:59:06 +01:00
|
|
|
return;
|
2023-12-10 17:50:22 +01:00
|
|
|
ctxt->nbWarnings += 1;
|
|
|
|
} else {
|
2024-08-05 15:14:21 +02:00
|
|
|
/* Report at least one fatal error. */
|
|
|
|
if ((ctxt->nbErrors >= XML_MAX_ERRORS) &&
|
2024-11-25 20:59:06 +01:00
|
|
|
((level < XML_ERR_FATAL) || (ctxt->wellFormed == 0)) &&
|
|
|
|
(!xmlIsCatastrophicError(level, code)))
|
|
|
|
return;
|
2023-12-10 17:50:22 +01:00
|
|
|
ctxt->nbErrors += 1;
|
|
|
|
}
|
|
|
|
|
2023-12-18 22:48:24 +01:00
|
|
|
if (((ctxt->options & XML_PARSE_NOERROR) == 0) &&
|
|
|
|
((level != XML_ERR_WARNING) ||
|
|
|
|
((ctxt->options & XML_PARSE_NOWARNING) == 0))) {
|
|
|
|
if (ctxt->errorHandler) {
|
|
|
|
schannel = ctxt->errorHandler;
|
|
|
|
data = ctxt->errorCtxt;
|
|
|
|
} else if ((ctxt->sax->initialized == XML_SAX2_MAGIC) &&
|
|
|
|
(ctxt->sax->serror != NULL)) {
|
|
|
|
schannel = ctxt->sax->serror;
|
|
|
|
data = ctxt->userData;
|
|
|
|
} else if ((domain == XML_FROM_VALID) || (domain == XML_FROM_DTD)) {
|
|
|
|
if (level == XML_ERR_WARNING)
|
|
|
|
channel = ctxt->vctxt.warning;
|
|
|
|
else
|
|
|
|
channel = ctxt->vctxt.error;
|
|
|
|
data = ctxt->vctxt.userData;
|
|
|
|
} else {
|
|
|
|
if (level == XML_ERR_WARNING)
|
|
|
|
channel = ctxt->sax->warning;
|
|
|
|
else
|
|
|
|
channel = ctxt->sax->error;
|
|
|
|
data = ctxt->userData;
|
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ctxt->input != NULL) {
|
|
|
|
xmlParserInputPtr input = ctxt->input;
|
|
|
|
|
|
|
|
if ((input->filename == NULL) &&
|
|
|
|
(ctxt->inputNr > 1)) {
|
|
|
|
input = ctxt->inputTab[ctxt->inputNr - 2];
|
|
|
|
}
|
|
|
|
file = input->filename;
|
|
|
|
line = input->line;
|
|
|
|
col = input->col;
|
|
|
|
}
|
|
|
|
|
|
|
|
res = xmlVRaiseError(schannel, channel, data, ctxt, node, domain, code,
|
|
|
|
level, file, line, (const char *) str1,
|
|
|
|
(const char *) str2, (const char *) str3, int1, col,
|
|
|
|
msg, ap);
|
|
|
|
|
|
|
|
if (res < 0) {
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
2023-12-10 17:50:22 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (level >= XML_ERR_ERROR)
|
|
|
|
ctxt->errNo = code;
|
|
|
|
if (level == XML_ERR_FATAL) {
|
|
|
|
ctxt->wellFormed = 0;
|
2024-06-26 19:28:28 +02:00
|
|
|
|
|
|
|
if (xmlCtxtIsCatastrophicError(ctxt))
|
|
|
|
ctxt->disableSAX = 2; /* stop parser */
|
|
|
|
else if (ctxt->recovery == 0)
|
2023-12-10 17:50:22 +01:00
|
|
|
ctxt->disableSAX = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-20 13:58:22 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtErr:
|
|
|
|
* @ctxt: a parser context
|
|
|
|
* @node: the current node or NULL
|
|
|
|
* @domain: the domain for the error
|
|
|
|
* @code: the code for the error
|
|
|
|
* @level: the xmlErrorLevel for the error
|
|
|
|
* @str1: extra string info
|
|
|
|
* @str2: extra string info
|
|
|
|
* @str3: extra string info
|
|
|
|
* @int1: extra int info
|
|
|
|
* @msg: the message to display/transmit
|
|
|
|
* @...: extra parameters for the message display
|
|
|
|
*
|
|
|
|
* Raise a parser error.
|
|
|
|
*/
|
2023-12-10 17:50:22 +01:00
|
|
|
void
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
|
|
|
|
xmlParserErrors code, xmlErrorLevel level,
|
|
|
|
const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
|
|
|
int int1, const char *msg, ...)
|
2023-12-10 17:50:22 +01:00
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, msg);
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtVErr(ctxt, node, domain, code, level,
|
|
|
|
str1, str2, str3, int1, msg, ap);
|
2023-12-10 17:50:22 +01:00
|
|
|
va_end(ap);
|
2003-10-05 21:33:18 +00:00
|
|
|
}
|
|
|
|
|
2024-06-26 19:28:28 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtGetStatus:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
|
|
|
* Get well-formedness and validation status after parsing. Also
|
|
|
|
* reports catastrophic errors which are not related to parsing
|
|
|
|
* like out-of-memory, I/O or other errors.
|
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 19:28:28 +02:00
|
|
|
* Returns a bitmask of XML_STATUS_* flags ORed together.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlCtxtGetStatus(xmlParserCtxt *ctxt) {
|
|
|
|
int bits = 0;
|
|
|
|
|
|
|
|
if (xmlCtxtIsCatastrophicError(ctxt)) {
|
|
|
|
bits |= XML_STATUS_CATASTROPHIC_ERROR |
|
|
|
|
XML_STATUS_NOT_WELL_FORMED |
|
|
|
|
XML_STATUS_NOT_NS_WELL_FORMED;
|
|
|
|
if ((ctxt != NULL) && (ctxt->validate))
|
|
|
|
bits |= XML_STATUS_DTD_VALIDATION_FAILED;
|
|
|
|
|
|
|
|
return(bits);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!ctxt->wellFormed)
|
|
|
|
bits |= XML_STATUS_NOT_WELL_FORMED;
|
|
|
|
if (!ctxt->nsWellFormed)
|
|
|
|
bits |= XML_STATUS_NOT_NS_WELL_FORMED;
|
|
|
|
if ((ctxt->validate) && (!ctxt->valid))
|
|
|
|
bits |= XML_STATUS_DTD_VALIDATION_FAILED;
|
|
|
|
|
|
|
|
return(bits);
|
|
|
|
}
|
|
|
|
|
2023-04-30 17:51:29 +02:00
|
|
|
/**
|
|
|
|
* xmlFatalErr:
|
|
|
|
* @ctxt: an XML parser context
|
2024-05-20 13:58:22 +02:00
|
|
|
* @code: the error number
|
2023-09-21 22:57:33 +02:00
|
|
|
* @info: extra information string
|
2023-04-30 17:51:29 +02:00
|
|
|
*
|
|
|
|
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
|
|
|
|
*/
|
|
|
|
void
|
2023-12-29 18:47:30 +01:00
|
|
|
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors code, const char *info)
|
2023-04-30 17:51:29 +02:00
|
|
|
{
|
|
|
|
const char *errmsg;
|
|
|
|
|
2023-12-29 18:47:30 +01:00
|
|
|
errmsg = xmlErrString(code);
|
2023-12-10 17:50:22 +01:00
|
|
|
|
2023-04-30 17:51:29 +02:00
|
|
|
if (info == NULL) {
|
2024-09-02 18:37:41 +02:00
|
|
|
xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
|
2023-12-20 00:33:34 +01:00
|
|
|
NULL, NULL, NULL, 0, "%s\n", errmsg);
|
2023-04-30 17:51:29 +02:00
|
|
|
} else {
|
2024-09-02 18:37:41 +02:00
|
|
|
xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, code, XML_ERR_FATAL,
|
2023-12-20 00:33:34 +01:00
|
|
|
(const xmlChar *) info, NULL, NULL, 0,
|
|
|
|
"%s: %s\n", errmsg, info);
|
2023-04-30 17:51:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlIsLetter:
|
|
|
|
* @c: an unicode character (int)
|
|
|
|
*
|
2024-06-26 01:08:48 +02:00
|
|
|
* DEPRECATED: Internal function, don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Check whether the character is allowed by the production
|
|
|
|
* [84] Letter ::= BaseChar | Ideographic
|
|
|
|
*
|
|
|
|
* Returns 0 if not, non-zero otherwise
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlIsLetter(int c) {
|
|
|
|
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* *
|
2012-07-16 14:19:49 +08:00
|
|
|
* Input handling functions for progressive parsing *
|
2001-02-23 17:55:21 +00:00
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/* we need to keep enough input to show errors in context */
|
|
|
|
#define LINE_LEN 80
|
|
|
|
|
2023-03-14 14:42:36 +01:00
|
|
|
/**
|
|
|
|
* xmlHaltParser:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
|
|
|
* Blocks further parser processing don't override error
|
|
|
|
* for internal use
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlHaltParser(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
2023-12-10 17:50:22 +01:00
|
|
|
ctxt->instate = XML_PARSER_EOF; /* TODO: Remove after refactoring */
|
|
|
|
ctxt->disableSAX = 2;
|
2023-03-14 14:42:36 +01:00
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlParserInputRead:
|
|
|
|
* @in: an XML parser input
|
|
|
|
* @len: an indicative size for the lookahead
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: This function was internal and is deprecated.
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2012-07-16 14:19:49 +08:00
|
|
|
* Returns -1 as this is an error to use it.
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
|
|
|
int
|
2012-07-16 14:19:49 +08:00
|
|
|
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
|
|
|
|
return(-1);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2023-03-12 16:47:15 +01:00
|
|
|
/**
|
|
|
|
* xmlParserGrow:
|
|
|
|
* @ctxt: an XML parser context
|
2023-09-21 22:57:33 +02:00
|
|
|
*
|
|
|
|
* Grow the input buffer.
|
|
|
|
*
|
|
|
|
* Returns the number of bytes read or -1 in case of error.
|
2023-03-12 16:47:15 +01:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlParserGrow(xmlParserCtxtPtr ctxt) {
|
|
|
|
xmlParserInputPtr in = ctxt->input;
|
|
|
|
xmlParserInputBufferPtr buf = in->buf;
|
2024-01-02 17:52:43 +01:00
|
|
|
size_t curEnd = in->end - in->cur;
|
|
|
|
size_t curBase = in->cur - in->base;
|
|
|
|
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
|
|
|
|
XML_MAX_HUGE_LENGTH :
|
|
|
|
XML_MAX_LOOKUP_LIMIT;
|
2023-03-12 16:47:15 +01:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (buf == NULL)
|
|
|
|
return(0);
|
2023-04-12 13:43:28 +02:00
|
|
|
/* Don't grow push parser buffer. */
|
2023-12-26 03:13:05 +01:00
|
|
|
if (PARSER_PROGRESSIVE(ctxt))
|
2023-04-12 13:43:28 +02:00
|
|
|
return(0);
|
2023-12-13 17:25:37 +01:00
|
|
|
/* Don't grow memory buffers. */
|
|
|
|
if ((buf->encoder == NULL) && (buf->readcallback == NULL))
|
|
|
|
return(0);
|
2023-06-07 14:05:34 +02:00
|
|
|
if (buf->error != 0)
|
|
|
|
return(-1);
|
2023-03-12 16:47:15 +01:00
|
|
|
|
2024-01-02 17:52:43 +01:00
|
|
|
if (curBase > maxLength) {
|
2023-12-10 17:50:22 +01:00
|
|
|
xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
|
|
|
|
"Buffer size limit exceeded, try XML_PARSE_HUGE\n");
|
2023-03-16 17:48:57 +01:00
|
|
|
xmlHaltParser(ctxt);
|
2023-03-12 16:47:15 +01:00
|
|
|
return(-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (curEnd >= INPUT_CHUNK)
|
|
|
|
return(0);
|
|
|
|
|
|
|
|
ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
|
2023-08-08 15:21:14 +02:00
|
|
|
xmlBufUpdateInput(buf->buffer, in, curBase);
|
2023-03-12 16:47:15 +01:00
|
|
|
|
2023-06-08 21:53:05 +02:00
|
|
|
if (ret < 0) {
|
2023-12-19 19:52:28 +01:00
|
|
|
xmlCtxtErrIO(ctxt, buf->error, NULL);
|
2023-06-08 21:53:05 +02:00
|
|
|
}
|
2023-03-12 16:47:15 +01:00
|
|
|
|
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlParserInputGrow:
|
|
|
|
* @in: an XML parser input
|
|
|
|
* @len: an indicative size for the lookahead
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* This function increase the input for the parser. It tries to
|
|
|
|
* preserve pointers to the input buffer, and keep already read data
|
|
|
|
*
|
2012-07-16 14:19:49 +08:00
|
|
|
* Returns the amount of char read, or -1 in case of error, 0 indicate the
|
2001-02-23 17:55:21 +00:00
|
|
|
* end of this entity
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlParserInputGrow(xmlParserInputPtr in, int len) {
|
2016-05-18 14:52:59 -07:00
|
|
|
int ret;
|
2012-07-16 14:19:49 +08:00
|
|
|
size_t indx;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2012-07-16 14:19:49 +08:00
|
|
|
if ((in == NULL) || (len < 0)) return(-1);
|
2001-02-23 17:55:21 +00:00
|
|
|
if (in->buf == NULL) return(-1);
|
|
|
|
if (in->base == NULL) return(-1);
|
|
|
|
if (in->cur == NULL) return(-1);
|
|
|
|
if (in->buf->buffer == NULL) return(-1);
|
|
|
|
|
2023-12-13 17:25:37 +01:00
|
|
|
/* Don't grow memory buffers. */
|
|
|
|
if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
|
|
|
|
return(0);
|
|
|
|
|
2001-03-24 17:00:36 +00:00
|
|
|
indx = in->cur - in->base;
|
2012-07-16 14:19:49 +08:00
|
|
|
if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
|
2001-02-23 17:55:21 +00:00
|
|
|
return(0);
|
|
|
|
}
|
2022-11-13 16:56:10 +01:00
|
|
|
ret = xmlParserInputBufferGrow(in->buf, len);
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2022-03-06 02:29:00 +01:00
|
|
|
in->base = xmlBufContent(in->buf->buffer);
|
2023-01-22 14:52:06 +01:00
|
|
|
if (in->base == NULL) {
|
|
|
|
in->base = BAD_CAST "";
|
|
|
|
in->cur = in->base;
|
|
|
|
in->end = in->base;
|
|
|
|
return(-1);
|
|
|
|
}
|
2022-03-06 02:29:00 +01:00
|
|
|
in->cur = in->base + indx;
|
2012-07-16 14:19:49 +08:00
|
|
|
in->end = xmlBufEnd(in->buf->buffer);
|
2001-02-23 17:55:21 +00:00
|
|
|
|
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
2023-03-13 17:51:13 +01:00
|
|
|
/**
|
|
|
|
* xmlParserShrink:
|
|
|
|
* @ctxt: an XML parser context
|
2023-09-21 22:57:33 +02:00
|
|
|
*
|
|
|
|
* Shrink the input buffer.
|
2023-03-13 17:51:13 +01:00
|
|
|
*/
|
2023-03-21 13:08:44 +01:00
|
|
|
void
|
2023-03-13 17:51:13 +01:00
|
|
|
xmlParserShrink(xmlParserCtxtPtr ctxt) {
|
|
|
|
xmlParserInputPtr in = ctxt->input;
|
|
|
|
xmlParserInputBufferPtr buf = in->buf;
|
2024-07-07 03:02:11 +02:00
|
|
|
size_t used, res;
|
2023-03-13 17:51:13 +01:00
|
|
|
|
2023-10-22 13:59:55 +02:00
|
|
|
if (buf == NULL)
|
2023-03-21 13:08:44 +01:00
|
|
|
return;
|
2023-03-13 17:51:13 +01:00
|
|
|
|
|
|
|
used = in->cur - in->base;
|
2024-07-07 03:02:11 +02:00
|
|
|
|
|
|
|
if (used > LINE_LEN) {
|
|
|
|
res = xmlBufShrink(buf->buffer, used - LINE_LEN);
|
|
|
|
|
|
|
|
if (res > 0) {
|
2023-03-13 17:51:13 +01:00
|
|
|
used -= res;
|
|
|
|
if ((res > ULONG_MAX) ||
|
|
|
|
(in->consumed > ULONG_MAX - (unsigned long)res))
|
|
|
|
in->consumed = ULONG_MAX;
|
|
|
|
else
|
|
|
|
in->consumed += res;
|
2024-07-07 03:02:11 +02:00
|
|
|
}
|
2023-03-13 17:51:13 +01:00
|
|
|
|
2024-07-07 03:02:11 +02:00
|
|
|
xmlBufUpdateInput(buf->buffer, in, used);
|
|
|
|
}
|
2023-03-13 17:51:13 +01:00
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlParserInputShrink:
|
|
|
|
* @in: an XML parser input
|
|
|
|
*
|
2023-03-13 19:19:46 +01:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* This function removes used input for the parser.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlParserInputShrink(xmlParserInputPtr in) {
|
2012-07-16 14:19:49 +08:00
|
|
|
size_t used;
|
|
|
|
size_t ret;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2004-11-08 14:02:18 +00:00
|
|
|
if (in == NULL) return;
|
2001-02-23 17:55:21 +00:00
|
|
|
if (in->buf == NULL) return;
|
|
|
|
if (in->base == NULL) return;
|
|
|
|
if (in->cur == NULL) return;
|
|
|
|
if (in->buf->buffer == NULL) return;
|
|
|
|
|
2022-03-06 02:29:00 +01:00
|
|
|
used = in->cur - in->base;
|
2024-07-07 03:02:11 +02:00
|
|
|
|
|
|
|
if (used > LINE_LEN) {
|
2012-07-16 14:19:49 +08:00
|
|
|
ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
|
2001-02-23 17:55:21 +00:00
|
|
|
if (ret > 0) {
|
2022-03-06 02:29:00 +01:00
|
|
|
used -= ret;
|
2022-11-13 20:19:13 +01:00
|
|
|
if ((ret > ULONG_MAX) ||
|
|
|
|
(in->consumed > ULONG_MAX - (unsigned long)ret))
|
|
|
|
in->consumed = ULONG_MAX;
|
|
|
|
else
|
|
|
|
in->consumed += ret;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2024-07-07 03:02:11 +02:00
|
|
|
xmlBufUpdateInput(in->buf->buffer, in, used);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* *
|
2012-09-11 13:26:36 +08:00
|
|
|
* UTF8 character input and related functions *
|
2001-02-23 17:55:21 +00:00
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNextChar:
|
|
|
|
* @ctxt: the XML parser context
|
|
|
|
*
|
2023-03-13 19:38:41 +01:00
|
|
|
* DEPRECATED: Internal function, do not use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Skip to the next char input char.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
2003-03-22 00:04:05 +00:00
|
|
|
xmlNextChar(xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
2023-08-09 16:59:36 +02:00
|
|
|
const unsigned char *cur;
|
|
|
|
size_t avail;
|
|
|
|
int c;
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
if ((ctxt == NULL) || (ctxt->input == NULL))
|
2003-03-22 00:04:05 +00:00
|
|
|
return;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
avail = ctxt->input->end - ctxt->input->cur;
|
2016-03-01 15:18:04 -08:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (avail < INPUT_CHUNK) {
|
2023-06-07 14:05:34 +02:00
|
|
|
xmlParserGrow(ctxt);
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->input->cur >= ctxt->input->end)
|
2023-03-15 16:18:11 +01:00
|
|
|
return;
|
2023-08-09 16:59:36 +02:00
|
|
|
avail = ctxt->input->end - ctxt->input->cur;
|
2016-03-01 15:18:04 -08:00
|
|
|
}
|
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
cur = ctxt->input->cur;
|
|
|
|
c = *cur;
|
2023-09-25 14:35:43 +02:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (c < 0x80) {
|
|
|
|
if (c == '\n') {
|
|
|
|
ctxt->input->cur++;
|
2023-09-25 14:35:43 +02:00
|
|
|
ctxt->input->line++;
|
|
|
|
ctxt->input->col = 1;
|
2023-08-09 16:59:36 +02:00
|
|
|
} else if (c == '\r') {
|
|
|
|
/*
|
|
|
|
* 2.11 End-of-Line Handling
|
|
|
|
* the literal two-character sequence "#xD#xA" or a standalone
|
|
|
|
* literal #xD, an XML processor must pass to the application
|
|
|
|
* the single character #xA.
|
|
|
|
*/
|
|
|
|
ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
|
|
|
|
ctxt->input->line++;
|
|
|
|
ctxt->input->col = 1;
|
|
|
|
return;
|
2023-09-25 14:35:43 +02:00
|
|
|
} else {
|
2023-08-09 16:59:36 +02:00
|
|
|
ctxt->input->cur++;
|
2016-03-01 15:18:04 -08:00
|
|
|
ctxt->input->col++;
|
2023-09-25 14:35:43 +02:00
|
|
|
}
|
2023-08-09 16:59:36 +02:00
|
|
|
} else {
|
|
|
|
ctxt->input->col++;
|
2016-03-01 15:18:04 -08:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
|
|
|
|
goto encoding_error;
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (c < 0xe0) {
|
|
|
|
/* 2-byte code */
|
|
|
|
if (c < 0xc2)
|
|
|
|
goto encoding_error;
|
|
|
|
ctxt->input->cur += 2;
|
|
|
|
} else {
|
|
|
|
unsigned int val = (c << 8) | cur[1];
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
|
2016-03-01 15:18:04 -08:00
|
|
|
goto encoding_error;
|
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (c < 0xf0) {
|
|
|
|
/* 3-byte code */
|
|
|
|
if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
|
|
|
|
goto encoding_error;
|
|
|
|
ctxt->input->cur += 3;
|
|
|
|
} else {
|
|
|
|
if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
|
2003-03-22 00:04:05 +00:00
|
|
|
goto encoding_error;
|
2023-09-25 14:35:43 +02:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
/* 4-byte code */
|
|
|
|
if ((val < 0xf090) || (val >= 0xf490))
|
|
|
|
goto encoding_error;
|
|
|
|
ctxt->input->cur += 4;
|
2023-09-25 14:35:43 +02:00
|
|
|
}
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
2023-08-09 16:59:36 +02:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
return;
|
2023-08-09 16:59:36 +02:00
|
|
|
|
2003-10-05 13:51:35 +00:00
|
|
|
encoding_error:
|
2023-08-09 16:59:36 +02:00
|
|
|
/* Only report the first error */
|
|
|
|
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
|
2023-12-19 20:47:36 +01:00
|
|
|
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
|
2023-08-09 16:59:36 +02:00
|
|
|
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
}
|
2002-03-20 21:55:57 +00:00
|
|
|
ctxt->input->cur++;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCurrentChar:
|
|
|
|
* @ctxt: the XML parser context
|
|
|
|
* @len: pointer to the length of the char read
|
|
|
|
*
|
2023-03-13 19:38:41 +01:00
|
|
|
* DEPRECATED: Internal function, do not use.
|
|
|
|
*
|
2001-12-31 16:16:02 +00:00
|
|
|
* The current char value, if using UTF-8 this may actually span multiple
|
2001-02-23 17:55:21 +00:00
|
|
|
* bytes in the input buffer. Implement the end of line normalization:
|
|
|
|
* 2.11 End-of-Line Handling
|
|
|
|
* Wherever an external parsed entity or the literal entity value
|
|
|
|
* of an internal parsed entity contains either the literal two-character
|
|
|
|
* sequence "#xD#xA" or a standalone literal #xD, an XML processor
|
|
|
|
* must pass to the application the single character #xA.
|
|
|
|
* This behavior can conveniently be produced by normalizing all
|
|
|
|
* line breaks to #xA on input, before parsing.)
|
|
|
|
*
|
2001-10-10 09:45:09 +00:00
|
|
|
* Returns the current char value and its length
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
2023-08-09 16:59:36 +02:00
|
|
|
const unsigned char *cur;
|
|
|
|
size_t avail;
|
|
|
|
int c;
|
|
|
|
|
2004-11-08 14:02:18 +00:00
|
|
|
if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
avail = ctxt->input->end - ctxt->input->cur;
|
|
|
|
|
|
|
|
if (avail < INPUT_CHUNK) {
|
2023-06-07 14:05:34 +02:00
|
|
|
xmlParserGrow(ctxt);
|
2023-08-09 16:59:36 +02:00
|
|
|
avail = ctxt->input->end - ctxt->input->cur;
|
2023-06-07 14:05:34 +02:00
|
|
|
}
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
cur = ctxt->input->cur;
|
|
|
|
c = *cur;
|
|
|
|
|
|
|
|
if (c < 0x80) {
|
|
|
|
/* 1-byte code */
|
|
|
|
if (c < 0x20) {
|
|
|
|
/*
|
|
|
|
* 2.11 End-of-Line Handling
|
|
|
|
* the literal two-character sequence "#xD#xA" or a standalone
|
|
|
|
* literal #xD, an XML processor must pass to the application
|
|
|
|
* the single character #xA.
|
|
|
|
*/
|
|
|
|
if (c == '\r') {
|
2023-11-26 14:31:39 +01:00
|
|
|
/*
|
|
|
|
* TODO: This function shouldn't change the 'cur' pointer
|
|
|
|
* as side effect, but the NEXTL macro in parser.c relies
|
|
|
|
* on this behavior when incrementing line numbers.
|
|
|
|
*/
|
|
|
|
if (cur[1] == '\n')
|
|
|
|
ctxt->input->cur++;
|
|
|
|
*len = 1;
|
2023-08-09 16:59:36 +02:00
|
|
|
c = '\n';
|
|
|
|
} else if (c == 0) {
|
|
|
|
if (ctxt->input->cur >= ctxt->input->end) {
|
|
|
|
*len = 0;
|
|
|
|
} else {
|
|
|
|
*len = 1;
|
2023-10-22 15:56:46 +02:00
|
|
|
/*
|
|
|
|
* TODO: Null bytes should be handled by callers,
|
|
|
|
* but this can be tricky.
|
|
|
|
*/
|
2024-01-03 18:12:29 +01:00
|
|
|
xmlFatalErr(ctxt, XML_ERR_INVALID_CHAR,
|
|
|
|
"Char 0x0 out of allowed range\n");
|
2023-08-09 16:59:36 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*len = 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*len = 1;
|
|
|
|
}
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
return(c);
|
|
|
|
} else {
|
|
|
|
int val;
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (avail < 2)
|
|
|
|
goto incomplete_sequence;
|
|
|
|
if ((cur[1] & 0xc0) != 0x80)
|
|
|
|
goto encoding_error;
|
2023-03-15 16:18:11 +01:00
|
|
|
|
2023-08-09 16:59:36 +02:00
|
|
|
if (c < 0xe0) {
|
|
|
|
/* 2-byte code */
|
|
|
|
if (c < 0xc2)
|
|
|
|
goto encoding_error;
|
|
|
|
val = (c & 0x1f) << 6;
|
|
|
|
val |= cur[1] & 0x3f;
|
|
|
|
*len = 2;
|
|
|
|
} else {
|
|
|
|
if (avail < 3)
|
2023-05-18 17:31:44 +02:00
|
|
|
goto incomplete_sequence;
|
2023-08-09 16:59:36 +02:00
|
|
|
if ((cur[2] & 0xc0) != 0x80)
|
|
|
|
goto encoding_error;
|
|
|
|
|
|
|
|
if (c < 0xf0) {
|
|
|
|
/* 3-byte code */
|
|
|
|
val = (c & 0xf) << 12;
|
|
|
|
val |= (cur[1] & 0x3f) << 6;
|
|
|
|
val |= cur[2] & 0x3f;
|
|
|
|
if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
|
|
|
|
goto encoding_error;
|
|
|
|
*len = 3;
|
|
|
|
} else {
|
|
|
|
if (avail < 4)
|
2023-05-18 17:31:44 +02:00
|
|
|
goto incomplete_sequence;
|
2023-08-09 16:59:36 +02:00
|
|
|
if ((cur[3] & 0xc0) != 0x80)
|
|
|
|
goto encoding_error;
|
|
|
|
|
|
|
|
/* 4-byte code */
|
|
|
|
val = (c & 0x0f) << 18;
|
|
|
|
val |= (cur[1] & 0x3f) << 12;
|
|
|
|
val |= (cur[2] & 0x3f) << 6;
|
|
|
|
val |= cur[3] & 0x3f;
|
|
|
|
if ((val < 0x10000) || (val >= 0x110000))
|
|
|
|
goto encoding_error;
|
|
|
|
*len = 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(val);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
2017-08-30 14:16:01 +02:00
|
|
|
|
2023-05-18 17:31:44 +02:00
|
|
|
encoding_error:
|
2023-08-09 16:59:36 +02:00
|
|
|
/* Only report the first error */
|
|
|
|
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
|
2023-12-19 20:47:36 +01:00
|
|
|
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
|
2023-08-09 16:59:36 +02:00
|
|
|
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
*len = 1;
|
2024-07-06 14:58:16 +02:00
|
|
|
return(XML_INVALID_CHAR);
|
2023-05-18 17:31:44 +02:00
|
|
|
|
|
|
|
incomplete_sequence:
|
|
|
|
/*
|
|
|
|
* An encoding problem may arise from a truncated input buffer
|
|
|
|
* splitting a character in the middle. In that case do not raise
|
|
|
|
* an error but return 0. This should only happen when push parsing
|
|
|
|
* char data.
|
|
|
|
*/
|
|
|
|
*len = 0;
|
|
|
|
return(0);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlStringCurrentChar:
|
|
|
|
* @ctxt: the XML parser context
|
|
|
|
* @cur: pointer to the beginning of the char
|
|
|
|
* @len: pointer to the length of the char read
|
|
|
|
*
|
2023-03-13 19:38:41 +01:00
|
|
|
* DEPRECATED: Internal function, do not use.
|
|
|
|
*
|
2001-12-31 16:16:02 +00:00
|
|
|
* The current char value, if using UTF-8 this may actually span multiple
|
2001-02-23 17:55:21 +00:00
|
|
|
* bytes in the input buffer.
|
|
|
|
*
|
2001-10-10 09:45:09 +00:00
|
|
|
* Returns the current char value and its length
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
2023-09-22 15:45:20 +02:00
|
|
|
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
|
|
|
|
const xmlChar *cur, int *len) {
|
|
|
|
int c;
|
2002-01-13 15:43:22 +00:00
|
|
|
|
2023-09-22 15:45:20 +02:00
|
|
|
if ((cur == NULL) || (len == NULL))
|
|
|
|
return(0);
|
2017-08-30 14:16:01 +02:00
|
|
|
|
2023-09-22 15:45:20 +02:00
|
|
|
/* cur is zero-terminated, so we can lie about its length. */
|
|
|
|
*len = 4;
|
|
|
|
c = xmlGetUTF8Char(cur, len);
|
2004-11-09 14:59:59 +00:00
|
|
|
|
2023-09-22 15:45:20 +02:00
|
|
|
return((c < 0) ? 0 : c);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2001-03-24 17:00:36 +00:00
|
|
|
* xmlCopyCharMultiByte:
|
2001-12-31 16:16:02 +00:00
|
|
|
* @out: pointer to an array of xmlChar
|
2001-02-23 17:55:21 +00:00
|
|
|
* @val: the char value
|
|
|
|
*
|
2024-11-17 19:48:44 +01:00
|
|
|
* DEPRECATED: Internal function, don't use.
|
|
|
|
*
|
2012-09-11 13:26:36 +08:00
|
|
|
* append the char value in the array
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
|
|
|
* Returns the number of xmlChar written
|
|
|
|
*/
|
|
|
|
int
|
2001-03-24 17:00:36 +00:00
|
|
|
xmlCopyCharMultiByte(xmlChar *out, int val) {
|
2022-09-01 02:58:00 +02:00
|
|
|
if ((out == NULL) || (val < 0)) return(0);
|
2001-02-23 17:55:21 +00:00
|
|
|
/*
|
|
|
|
* We are supposed to handle UTF8, check it's valid
|
|
|
|
* From rfc2044: encoding of the Unicode values on UTF-8:
|
|
|
|
*
|
|
|
|
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
|
|
|
* 0000 0000-0000 007F 0xxxxxxx
|
|
|
|
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
|
2012-09-11 13:26:36 +08:00
|
|
|
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
2001-03-24 17:00:36 +00:00
|
|
|
if (val >= 0x80) {
|
|
|
|
xmlChar *savedout = out;
|
|
|
|
int bits;
|
|
|
|
if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
|
|
|
|
else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
|
|
|
|
else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
|
|
|
|
else {
|
2024-01-03 18:11:44 +01:00
|
|
|
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
2024-07-15 14:35:47 +02:00
|
|
|
xmlAbort("xmlCopyCharMultiByte: codepoint out of range\n");
|
2024-01-03 18:11:44 +01:00
|
|
|
#endif
|
2001-02-23 17:55:21 +00:00
|
|
|
return(0);
|
|
|
|
}
|
2001-03-24 17:00:36 +00:00
|
|
|
for ( ; bits >= 0; bits-= 6)
|
|
|
|
*out++= ((val >> bits) & 0x3F) | 0x80 ;
|
|
|
|
return (out - savedout);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
2022-09-01 02:58:00 +02:00
|
|
|
*out = val;
|
2001-03-24 17:00:36 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2001-03-24 17:00:36 +00:00
|
|
|
/**
|
|
|
|
* xmlCopyChar:
|
|
|
|
* @len: Ignored, compatibility
|
2001-12-31 16:16:02 +00:00
|
|
|
* @out: pointer to an array of xmlChar
|
2001-03-24 17:00:36 +00:00
|
|
|
* @val: the char value
|
|
|
|
*
|
2024-07-02 04:02:16 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2012-09-11 13:26:36 +08:00
|
|
|
* append the char value in the array
|
2001-03-24 17:00:36 +00:00
|
|
|
*
|
|
|
|
* Returns the number of xmlChar written
|
|
|
|
*/
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2001-03-24 17:00:36 +00:00
|
|
|
int
|
2001-03-26 16:28:29 +00:00
|
|
|
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
|
2022-09-01 02:58:00 +02:00
|
|
|
if ((out == NULL) || (val < 0)) return(0);
|
2001-03-24 17:00:36 +00:00
|
|
|
/* the len parameter is ignored */
|
|
|
|
if (val >= 0x80) {
|
|
|
|
return(xmlCopyCharMultiByte (out, val));
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
2022-09-01 02:58:00 +02:00
|
|
|
*out = val;
|
2001-03-24 17:00:36 +00:00
|
|
|
return 1;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/************************************************************************
|
|
|
|
* *
|
|
|
|
* Commodity functions to switch encodings *
|
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
2024-06-28 00:34:52 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtSetCharEncConvImpl:
|
|
|
|
* @ctxt: parser context
|
|
|
|
* @impl: callback
|
|
|
|
* @vctxt: user data
|
|
|
|
*
|
|
|
|
* Installs a custom implementation to convert between character
|
|
|
|
* encodings.
|
|
|
|
*
|
|
|
|
* This bypasses legacy feature like global encoding handlers or
|
|
|
|
* encoding aliases.
|
|
|
|
*
|
|
|
|
* Available since 2.14.0.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtSetCharEncConvImpl(xmlParserCtxtPtr ctxt, xmlCharEncConvImpl impl,
|
|
|
|
void *vctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ctxt->convImpl = impl;
|
|
|
|
ctxt->convCtxt = vctxt;
|
|
|
|
}
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
static int
|
2024-06-28 00:34:52 +02:00
|
|
|
xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
|
2023-03-21 19:07:12 +01:00
|
|
|
xmlChar out[200];
|
2024-06-28 00:34:52 +02:00
|
|
|
xmlParserInputPtr input = ctxt->input;
|
2023-03-21 19:07:12 +01:00
|
|
|
xmlCharEncodingHandlerPtr handler;
|
|
|
|
int inlen, outlen, res, i;
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
*hout = NULL;
|
|
|
|
|
2023-03-21 19:07:12 +01:00
|
|
|
/*
|
|
|
|
* To detect the EBCDIC code page, we convert the first 200 bytes
|
2024-06-28 00:34:52 +02:00
|
|
|
* to IBM037 (EBCDIC-US) and try to find the encoding declaration.
|
2023-03-21 19:07:12 +01:00
|
|
|
*/
|
2024-06-28 00:34:52 +02:00
|
|
|
res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
|
|
|
|
ctxt->convImpl, ctxt->convCtxt, &handler);
|
2023-12-10 17:50:22 +01:00
|
|
|
if (res != 0)
|
|
|
|
return(res);
|
2023-03-26 14:11:31 +02:00
|
|
|
outlen = sizeof(out) - 1;
|
2023-03-21 19:07:12 +01:00
|
|
|
inlen = input->end - input->cur;
|
2023-08-08 15:21:31 +02:00
|
|
|
res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
|
2023-12-10 17:50:22 +01:00
|
|
|
/*
|
|
|
|
* Return the EBCDIC handler if decoding failed. The error will
|
|
|
|
* be reported later.
|
|
|
|
*/
|
2023-03-21 19:07:12 +01:00
|
|
|
if (res < 0)
|
2023-12-10 17:50:22 +01:00
|
|
|
goto done;
|
2023-03-26 14:11:31 +02:00
|
|
|
out[outlen] = 0;
|
2023-03-21 19:07:12 +01:00
|
|
|
|
|
|
|
for (i = 0; i < outlen; i++) {
|
|
|
|
if (out[i] == '>')
|
|
|
|
break;
|
|
|
|
if ((out[i] == 'e') &&
|
|
|
|
(xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
|
|
|
|
int start, cur, quote;
|
|
|
|
|
|
|
|
i += 8;
|
|
|
|
while (IS_BLANK_CH(out[i]))
|
|
|
|
i += 1;
|
|
|
|
if (out[i++] != '=')
|
|
|
|
break;
|
|
|
|
while (IS_BLANK_CH(out[i]))
|
|
|
|
i += 1;
|
|
|
|
quote = out[i++];
|
|
|
|
if ((quote != '\'') && (quote != '"'))
|
|
|
|
break;
|
|
|
|
start = i;
|
|
|
|
cur = out[i];
|
|
|
|
while (((cur >= 'a') && (cur <= 'z')) ||
|
|
|
|
((cur >= 'A') && (cur <= 'Z')) ||
|
|
|
|
((cur >= '0') && (cur <= '9')) ||
|
|
|
|
(cur == '.') || (cur == '_') ||
|
|
|
|
(cur == '-'))
|
|
|
|
cur = out[++i];
|
|
|
|
if (cur != quote)
|
|
|
|
break;
|
|
|
|
out[i] = 0;
|
|
|
|
xmlCharEncCloseFunc(handler);
|
2024-06-28 00:34:52 +02:00
|
|
|
res = xmlCreateCharEncodingHandler((char *) out + start,
|
|
|
|
/* output */ 0, ctxt->convImpl, ctxt->convCtxt,
|
|
|
|
&handler);
|
2023-12-10 17:50:22 +01:00
|
|
|
if (res != 0)
|
|
|
|
return(res);
|
|
|
|
*hout = handler;
|
|
|
|
return(0);
|
2023-03-21 19:07:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
done:
|
2023-08-08 15:21:31 +02:00
|
|
|
/*
|
2023-12-10 17:50:22 +01:00
|
|
|
* Encoding handlers are stateful, so we have to recreate them.
|
2023-08-08 15:21:31 +02:00
|
|
|
*/
|
|
|
|
xmlCharEncCloseFunc(handler);
|
2024-06-28 00:34:52 +02:00
|
|
|
res = xmlCreateCharEncodingHandler("IBM037", /* output */ 0,
|
|
|
|
ctxt->convImpl, ctxt->convCtxt, &handler);
|
2023-12-10 17:50:22 +01:00
|
|
|
if (res != 0)
|
|
|
|
return(res);
|
|
|
|
*hout = handler;
|
|
|
|
return(0);
|
2023-03-21 19:07:12 +01:00
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlSwitchEncoding:
|
|
|
|
* @ctxt: the parser context
|
|
|
|
* @enc: the encoding value (number)
|
|
|
|
*
|
2023-12-10 17:50:22 +01:00
|
|
|
* Use encoding specified by enum to decode input data. This overrides
|
|
|
|
* the encoding found in the XML declaration.
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
*
|
2023-12-10 17:50:22 +01:00
|
|
|
* This function can also be used to override the encoding of chunks
|
|
|
|
* passed to xmlParseChunk.
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
|
|
|
* Returns 0 in case of success, -1 otherwise
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
|
|
|
{
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
xmlCharEncodingHandlerPtr handler = NULL;
|
2015-11-09 18:07:18 +08:00
|
|
|
int ret;
|
2023-12-10 17:50:22 +01:00
|
|
|
int res;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
if ((ctxt == NULL) || (ctxt->input == NULL))
|
|
|
|
return(-1);
|
2023-06-22 18:06:53 +02:00
|
|
|
|
2024-06-28 23:13:38 +02:00
|
|
|
res = xmlLookupCharEncodingHandler(enc, &handler);
|
2023-12-10 17:50:22 +01:00
|
|
|
if (res != 0) {
|
2024-06-28 23:13:38 +02:00
|
|
|
xmlFatalErr(ctxt, res, NULL);
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
return(-1);
|
2015-11-09 18:07:18 +08:00
|
|
|
}
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
|
2024-07-02 03:41:05 +02:00
|
|
|
ret = xmlSwitchToEncoding(ctxt, handler);
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
|
|
|
|
if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
|
|
|
|
ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
|
|
|
|
}
|
|
|
|
|
2015-11-09 18:07:18 +08:00
|
|
|
return(ret);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
/**
|
2024-06-11 16:19:58 +02:00
|
|
|
* xmlSwitchInputEncodingName:
|
2024-06-28 01:41:36 +02:00
|
|
|
* @ctxt: the parser context
|
2023-12-27 18:33:30 +01:00
|
|
|
* @input: the input strea,
|
2023-12-10 17:50:22 +01:00
|
|
|
* @encoding: the encoding name
|
|
|
|
*
|
|
|
|
* Returns 0 in case of success, -1 otherwise
|
|
|
|
*/
|
2023-12-27 18:33:30 +01:00
|
|
|
static int
|
|
|
|
xmlSwitchInputEncodingName(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
|
|
|
const char *encoding) {
|
2023-12-10 17:50:22 +01:00
|
|
|
xmlCharEncodingHandlerPtr handler;
|
|
|
|
int res;
|
|
|
|
|
2023-12-19 20:47:36 +01:00
|
|
|
if (encoding == NULL)
|
|
|
|
return(-1);
|
|
|
|
|
2024-06-28 00:34:52 +02:00
|
|
|
res = xmlCreateCharEncodingHandler(encoding, /* output */ 0,
|
|
|
|
ctxt->convImpl, ctxt->convCtxt, &handler);
|
2024-09-02 18:37:41 +02:00
|
|
|
if (res == XML_ERR_UNSUPPORTED_ENCODING) {
|
|
|
|
xmlWarningMsg(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
|
|
|
"Unsupported encoding: %s\n", BAD_CAST encoding, NULL);
|
|
|
|
return(-1);
|
|
|
|
} else if (res != XML_ERR_OK) {
|
2023-12-19 20:47:36 +01:00
|
|
|
xmlFatalErr(ctxt, res, encoding);
|
2023-12-10 17:50:22 +01:00
|
|
|
return(-1);
|
|
|
|
}
|
|
|
|
|
2024-07-02 03:41:05 +02:00
|
|
|
res = xmlInputSetEncodingHandler(input, handler);
|
|
|
|
if (res != XML_ERR_OK) {
|
|
|
|
xmlCtxtErrIO(ctxt, res, NULL);
|
|
|
|
return(-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
2023-12-10 17:50:22 +01:00
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
2023-12-27 18:33:30 +01:00
|
|
|
* xmlSwitchEncodingName:
|
2001-02-23 17:55:21 +00:00
|
|
|
* @ctxt: the parser context
|
2023-12-27 18:33:30 +01:00
|
|
|
* @encoding: the encoding name
|
|
|
|
*
|
|
|
|
* Use specified encoding to decode input data. This overrides the
|
|
|
|
* encoding found in the XML declaration.
|
|
|
|
*
|
|
|
|
* This function can also be used to override the encoding of chunks
|
|
|
|
* passed to xmlParseChunk.
|
|
|
|
*
|
|
|
|
* Available since 2.13.0.
|
|
|
|
*
|
|
|
|
* Returns 0 in case of success, -1 otherwise
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlSwitchEncodingName(xmlParserCtxtPtr ctxt, const char *encoding) {
|
2024-06-12 13:32:32 +02:00
|
|
|
if (ctxt == NULL)
|
|
|
|
return(-1);
|
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
return(xmlSwitchInputEncodingName(ctxt, ctxt->input, encoding));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-06-11 02:15:18 +02:00
|
|
|
* xmlInputSetEncodingHandler:
|
2003-10-19 13:35:37 +00:00
|
|
|
* @input: the input stream
|
2001-02-23 17:55:21 +00:00
|
|
|
* @handler: the encoding handler
|
|
|
|
*
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
* Use encoding handler to decode input data.
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Closes the handler on error.
|
|
|
|
*
|
|
|
|
* Returns an xmlParserErrors code.
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
2024-06-28 00:34:52 +02:00
|
|
|
int
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
|
|
|
xmlCharEncodingHandlerPtr handler) {
|
2022-11-13 19:44:00 +01:00
|
|
|
xmlParserInputBufferPtr in;
|
2024-07-09 13:54:07 +02:00
|
|
|
xmlBufPtr buf;
|
2024-07-11 12:37:25 +02:00
|
|
|
int code = XML_ERR_OK;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
if ((input == NULL) || (input->buf == NULL)) {
|
2022-11-13 19:44:00 +01:00
|
|
|
xmlCharEncCloseFunc(handler);
|
2024-06-11 02:15:18 +02:00
|
|
|
return(XML_ERR_ARGUMENT);
|
2022-11-13 19:44:00 +01:00
|
|
|
}
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
in = input->buf;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
input->flags |= XML_INPUT_HAS_ENCODING;
|
|
|
|
|
2023-08-16 19:20:47 +02:00
|
|
|
/*
|
|
|
|
* UTF-8 requires no encoding handler.
|
|
|
|
*/
|
|
|
|
if ((handler != NULL) &&
|
|
|
|
(xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
|
|
|
|
xmlCharEncCloseFunc(handler);
|
|
|
|
handler = NULL;
|
|
|
|
}
|
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
if (in->encoder == handler)
|
2024-06-11 02:15:18 +02:00
|
|
|
return(XML_ERR_OK);
|
2023-04-13 15:11:47 +02:00
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
if (in->encoder != NULL) {
|
2022-11-13 19:44:00 +01:00
|
|
|
/*
|
2023-04-13 15:11:47 +02:00
|
|
|
* Switching encodings during parsing is a really bad idea,
|
2023-06-22 18:06:53 +02:00
|
|
|
* but Chromium can switch between ISO-8859-1 and UTF-16 before
|
|
|
|
* separate calls to xmlParseChunk.
|
2023-04-13 15:11:47 +02:00
|
|
|
*
|
|
|
|
* TODO: We should check whether the "raw" input buffer is empty and
|
|
|
|
* convert the old content using the old encoder.
|
2003-10-19 13:35:37 +00:00
|
|
|
*/
|
2023-04-13 15:11:47 +02:00
|
|
|
|
|
|
|
xmlCharEncCloseFunc(in->encoder);
|
|
|
|
in->encoder = handler;
|
2024-06-11 02:15:18 +02:00
|
|
|
return(XML_ERR_OK);
|
2022-11-13 19:44:00 +01:00
|
|
|
}
|
2023-03-21 19:07:12 +01:00
|
|
|
|
2024-07-07 18:52:17 +02:00
|
|
|
buf = xmlBufCreate(XML_IO_BUFFER_SIZE);
|
2024-07-10 22:31:15 +02:00
|
|
|
if (buf == NULL) {
|
|
|
|
xmlCharEncCloseFunc(handler);
|
2024-07-09 13:54:07 +02:00
|
|
|
return(XML_ERR_NO_MEMORY);
|
2024-07-10 22:31:15 +02:00
|
|
|
}
|
2024-07-09 13:54:07 +02:00
|
|
|
|
2022-11-13 19:44:00 +01:00
|
|
|
in->encoder = handler;
|
2024-07-09 13:54:07 +02:00
|
|
|
in->raw = in->buffer;
|
|
|
|
in->buffer = buf;
|
2003-10-19 13:35:37 +00:00
|
|
|
|
2022-11-13 19:44:00 +01:00
|
|
|
/*
|
|
|
|
* Is there already some content down the pipe to convert ?
|
|
|
|
*/
|
2024-07-11 12:37:25 +02:00
|
|
|
if (input->end > input->base) {
|
2023-08-08 15:19:51 +02:00
|
|
|
size_t processed;
|
2024-07-07 18:38:31 +02:00
|
|
|
size_t nbchars;
|
|
|
|
int res;
|
2022-11-13 19:44:00 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Shrink the current input buffer.
|
|
|
|
* Move it as the raw buffer and create a new input buffer
|
|
|
|
*/
|
|
|
|
processed = input->cur - input->base;
|
2024-07-09 13:54:07 +02:00
|
|
|
xmlBufShrink(in->raw, processed);
|
2022-11-20 19:55:12 +01:00
|
|
|
input->consumed += processed;
|
2022-11-13 19:44:00 +01:00
|
|
|
in->rawconsumed = processed;
|
|
|
|
|
2024-07-07 18:38:31 +02:00
|
|
|
nbchars = 4000 /* MINLEN */;
|
|
|
|
res = xmlCharEncInput(in, &nbchars);
|
|
|
|
if (res < 0)
|
2024-07-11 12:37:25 +02:00
|
|
|
code = in->error;
|
2024-06-11 02:15:18 +02:00
|
|
|
}
|
|
|
|
|
2024-07-11 12:37:25 +02:00
|
|
|
xmlBufResetInput(in->buffer, input);
|
|
|
|
|
|
|
|
return(code);
|
2024-06-11 02:15:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlSwitchInputEncoding:
|
|
|
|
* @ctxt: the parser context, only for error reporting
|
|
|
|
* @input: the input stream
|
|
|
|
* @handler: the encoding handler
|
|
|
|
*
|
|
|
|
* DEPRECATED: Internal function, don't use.
|
|
|
|
*
|
|
|
|
* Use encoding handler to decode input data.
|
|
|
|
*
|
|
|
|
* Returns 0 in case of success, -1 otherwise
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
|
|
|
xmlCharEncodingHandlerPtr handler) {
|
|
|
|
int code = xmlInputSetEncodingHandler(input, handler);
|
|
|
|
|
|
|
|
if (code != XML_ERR_OK) {
|
|
|
|
xmlCtxtErrIO(ctxt, code, NULL);
|
|
|
|
return(-1);
|
2003-10-19 13:35:37 +00:00
|
|
|
}
|
2024-06-11 02:15:18 +02:00
|
|
|
|
|
|
|
return(0);
|
2003-10-19 13:35:37 +00:00
|
|
|
}
|
|
|
|
|
2009-08-26 11:38:49 +02:00
|
|
|
/**
|
|
|
|
* xmlSwitchToEncoding:
|
|
|
|
* @ctxt: the parser context
|
|
|
|
* @handler: the encoding handler
|
|
|
|
*
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
* Use encoding handler to decode input data.
|
|
|
|
*
|
|
|
|
* This function can be used to enforce the encoding of chunks passed
|
|
|
|
* to xmlParseChunk.
|
2009-08-26 11:38:49 +02:00
|
|
|
*
|
|
|
|
* Returns 0 in case of success, -1 otherwise
|
|
|
|
*/
|
|
|
|
int
|
2012-09-11 13:26:36 +08:00
|
|
|
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
2009-08-26 11:38:49 +02:00
|
|
|
{
|
2024-07-02 03:41:05 +02:00
|
|
|
int code;
|
|
|
|
|
2022-03-06 23:23:43 +01:00
|
|
|
if (ctxt == NULL)
|
|
|
|
return(-1);
|
2024-07-02 03:41:05 +02:00
|
|
|
|
|
|
|
code = xmlInputSetEncodingHandler(ctxt->input, handler);
|
|
|
|
if (code != XML_ERR_OK) {
|
|
|
|
xmlCtxtErrIO(ctxt, code, NULL);
|
|
|
|
return(-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(0);
|
2009-08-26 11:38:49 +02:00
|
|
|
}
|
|
|
|
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
/**
|
|
|
|
* xmlDetectEncoding:
|
|
|
|
* @ctxt: the parser context
|
|
|
|
*
|
|
|
|
* Handle optional BOM, detect and switch to encoding.
|
|
|
|
*
|
|
|
|
* Assumes that there are at least four bytes in the input buffer.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
|
2023-08-09 18:37:20 +02:00
|
|
|
const xmlChar *in;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
xmlCharEncoding enc;
|
|
|
|
int bomSize;
|
|
|
|
int autoFlag = 0;
|
|
|
|
|
|
|
|
if (xmlParserGrow(ctxt) < 0)
|
|
|
|
return;
|
2023-08-09 18:37:20 +02:00
|
|
|
in = ctxt->input->cur;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
if (ctxt->input->end - in < 4)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
|
|
|
/*
|
|
|
|
* If the encoding was already set, only skip the BOM which was
|
|
|
|
* possibly decoded to UTF-8.
|
|
|
|
*/
|
|
|
|
if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
|
|
|
|
ctxt->input->cur += 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
enc = XML_CHAR_ENCODING_NONE;
|
|
|
|
bomSize = 0;
|
|
|
|
|
|
|
|
switch (in[0]) {
|
|
|
|
case 0x00:
|
|
|
|
if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
|
|
|
|
enc = XML_CHAR_ENCODING_UCS4BE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_OTHER;
|
|
|
|
} else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
|
|
|
|
enc = XML_CHAR_ENCODING_UTF16BE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_UTF16BE;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x3C:
|
|
|
|
if (in[1] == 0x00) {
|
|
|
|
if ((in[2] == 0x00) && (in[3] == 0x00)) {
|
|
|
|
enc = XML_CHAR_ENCODING_UCS4LE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_OTHER;
|
|
|
|
} else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
|
|
|
|
enc = XML_CHAR_ENCODING_UTF16LE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_UTF16LE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0x4C:
|
|
|
|
if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
|
|
|
|
enc = XML_CHAR_ENCODING_EBCDIC;
|
|
|
|
autoFlag = XML_INPUT_AUTO_OTHER;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xEF:
|
|
|
|
if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
|
|
|
|
enc = XML_CHAR_ENCODING_UTF8;
|
|
|
|
autoFlag = XML_INPUT_AUTO_UTF8;
|
|
|
|
bomSize = 3;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xFE:
|
|
|
|
if (in[1] == 0xFF) {
|
|
|
|
enc = XML_CHAR_ENCODING_UTF16BE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_UTF16BE;
|
|
|
|
bomSize = 2;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xFF:
|
|
|
|
if (in[1] == 0xFE) {
|
|
|
|
enc = XML_CHAR_ENCODING_UTF16LE;
|
|
|
|
autoFlag = XML_INPUT_AUTO_UTF16LE;
|
|
|
|
bomSize = 2;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bomSize > 0) {
|
|
|
|
ctxt->input->cur += bomSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (enc != XML_CHAR_ENCODING_NONE) {
|
|
|
|
ctxt->input->flags |= autoFlag;
|
2024-06-28 23:13:38 +02:00
|
|
|
|
|
|
|
if (enc == XML_CHAR_ENCODING_EBCDIC) {
|
|
|
|
xmlCharEncodingHandlerPtr handler;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
res = xmlDetectEBCDIC(ctxt, &handler);
|
|
|
|
if (res != XML_ERR_OK) {
|
|
|
|
xmlFatalErr(ctxt, res, "detecting EBCDIC\n");
|
|
|
|
} else {
|
2024-07-02 03:41:05 +02:00
|
|
|
xmlSwitchToEncoding(ctxt, handler);
|
2024-06-28 23:13:38 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
xmlSwitchEncoding(ctxt, enc);
|
|
|
|
}
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlSetDeclaredEncoding:
|
|
|
|
* @ctxt: the parser context
|
|
|
|
* @encoding: declared encoding
|
|
|
|
*
|
|
|
|
* Set the encoding from a declaration in the document.
|
|
|
|
*
|
|
|
|
* If no encoding was set yet, switch the encoding. Otherwise, only warn
|
|
|
|
* about encoding mismatches.
|
|
|
|
*
|
|
|
|
* Takes ownership of 'encoding'.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
|
|
|
|
if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
|
|
|
|
((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
|
2024-09-02 18:37:41 +02:00
|
|
|
xmlCharEncodingHandlerPtr handler;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* xmlSwitchEncodingName treats unsupported encodings as
|
|
|
|
* warnings, but we want it to be an error in an encoding
|
|
|
|
* declaration.
|
|
|
|
*/
|
|
|
|
res = xmlCreateCharEncodingHandler((const char *) encoding,
|
|
|
|
/* output */ 0, ctxt->convImpl, ctxt->convCtxt, &handler);
|
|
|
|
if (res != XML_ERR_OK) {
|
|
|
|
xmlFatalErr(ctxt, res, (const char *) encoding);
|
|
|
|
xmlFree(encoding);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
res = xmlInputSetEncodingHandler(ctxt->input, handler);
|
|
|
|
if (res != XML_ERR_OK) {
|
|
|
|
xmlCtxtErrIO(ctxt, res, NULL);
|
|
|
|
xmlFree(encoding);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2023-08-16 19:43:02 +02:00
|
|
|
ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
} else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
|
|
|
|
static const char *allowedUTF8[] = {
|
|
|
|
"UTF-8", "UTF8", NULL
|
|
|
|
};
|
|
|
|
static const char *allowedUTF16LE[] = {
|
|
|
|
"UTF-16", "UTF-16LE", "UTF16", NULL
|
|
|
|
};
|
|
|
|
static const char *allowedUTF16BE[] = {
|
|
|
|
"UTF-16", "UTF-16BE", "UTF16", NULL
|
|
|
|
};
|
|
|
|
const char **allowed = NULL;
|
|
|
|
const char *autoEnc = NULL;
|
|
|
|
|
|
|
|
switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
|
|
|
|
case XML_INPUT_AUTO_UTF8:
|
|
|
|
allowed = allowedUTF8;
|
|
|
|
autoEnc = "UTF-8";
|
|
|
|
break;
|
|
|
|
case XML_INPUT_AUTO_UTF16LE:
|
|
|
|
allowed = allowedUTF16LE;
|
|
|
|
autoEnc = "UTF-16LE";
|
|
|
|
break;
|
|
|
|
case XML_INPUT_AUTO_UTF16BE:
|
|
|
|
allowed = allowedUTF16BE;
|
|
|
|
autoEnc = "UTF-16BE";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (allowed != NULL) {
|
|
|
|
const char **p;
|
|
|
|
int match = 0;
|
|
|
|
|
|
|
|
for (p = allowed; *p != NULL; p++) {
|
|
|
|
if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
|
|
|
|
match = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match == 0) {
|
|
|
|
xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
|
|
|
|
"Encoding '%s' doesn't match "
|
|
|
|
"auto-detected '%s'\n",
|
|
|
|
encoding, BAD_CAST autoEnc);
|
2023-12-10 17:50:22 +01:00
|
|
|
xmlFree(encoding);
|
|
|
|
encoding = xmlStrdup(BAD_CAST autoEnc);
|
|
|
|
if (encoding == NULL)
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
|
|
|
|
if (ctxt->encoding != NULL)
|
|
|
|
xmlFree((xmlChar *) ctxt->encoding);
|
|
|
|
ctxt->encoding = encoding;
|
parser: Rework encoding detection
Introduce XML_INPUT_HAS_ENCODING flag for xmlParserInput which is set
when xmlSwitchEncoding is called. The parser can use the flag to
reliably detect whether an encoding was already set via user override,
BOM or other auto-detection. In this case, the encoding declaration
won't be used to switch the encoding.
Before, an inscrutable mix of ctxt->charset, ctxt->input->encoding
and ctxt->input->buf->encoder was used.
Introduce private helper functions to switch encodings used by both the
XML and HTML parser:
- xmlDetectEncoding which skips over the BOM, allowing to remove the
BOM checks from other encoding functions.
- xmlSetDeclaredEncoding, replacing htmlCheckEncodingDirect, which warns
about encoding mismatches.
If users override the encoding, store the declared instead of the actual
encoding in xmlDoc. In this case, the actual encoding is known and the
raw value from the doc is more useful.
Also use the input flags to store the ISO-8859-1 fallback state.
Restrict the fallback to cases where no encoding was specified. (The
fallback is only useful in recovery mode and these days broken UTF-8 is
probably more likely than ISO-8859-1, so it might eventually be removed
completely.)
The 'charset' member of xmlParserCtxt is now unused. The 'encoding'
member of xmlParserInput is now unused.
The 'standalone' member of xmlParserInput is renamed to 'flags'.
A new parser state XML_PARSER_XML_DECL is added for the push parser.
2023-08-08 15:19:46 +02:00
|
|
|
}
|
|
|
|
|
2024-06-26 02:22:04 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtGetDeclaredEncoding:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
2024-06-26 02:22:04 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 02:22:04 +02:00
|
|
|
* Returns the encoding from the encoding declaration. This can differ
|
|
|
|
* from the actual encoding.
|
|
|
|
*/
|
|
|
|
const xmlChar *
|
|
|
|
xmlCtxtGetDeclaredEncoding(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(ctxt->encoding);
|
|
|
|
}
|
|
|
|
|
2024-02-26 15:14:28 +01:00
|
|
|
/**
|
|
|
|
* xmlGetActualEncoding:
|
|
|
|
* @ctxt: the parser context
|
|
|
|
*
|
|
|
|
* Returns the actual used to parse the document. This can differ from
|
|
|
|
* the declared encoding.
|
|
|
|
*/
|
|
|
|
const xmlChar *
|
|
|
|
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
|
|
|
|
const xmlChar *encoding = NULL;
|
|
|
|
|
|
|
|
if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
|
|
|
|
(ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
|
|
|
|
/* Preserve encoding exactly */
|
|
|
|
encoding = ctxt->encoding;
|
|
|
|
} else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
|
|
|
|
encoding = BAD_CAST ctxt->input->buf->encoder->name;
|
|
|
|
} else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
|
|
|
|
encoding = BAD_CAST "UTF-8";
|
|
|
|
}
|
|
|
|
|
|
|
|
return(encoding);
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/************************************************************************
|
|
|
|
* *
|
|
|
|
* Commodity functions to handle entities processing *
|
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlFreeInputStream:
|
|
|
|
* @input: an xmlParserInputPtr
|
|
|
|
*
|
|
|
|
* Free up an input stream.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlFreeInputStream(xmlParserInputPtr input) {
|
|
|
|
if (input == NULL) return;
|
|
|
|
|
|
|
|
if (input->filename != NULL) xmlFree((char *) input->filename);
|
|
|
|
if (input->version != NULL) xmlFree((char *) input->version);
|
|
|
|
if ((input->free != NULL) && (input->base != NULL))
|
|
|
|
input->free((xmlChar *) input->base);
|
2012-09-11 13:26:36 +08:00
|
|
|
if (input->buf != NULL)
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlFreeParserInputBuffer(input->buf);
|
|
|
|
xmlFree(input);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewInputStream:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
2024-07-06 22:14:21 +02:00
|
|
|
* DEPRECATED: Use xmlNewInputFromUrl or similar functions.
|
2024-07-02 02:18:03 +02:00
|
|
|
*
|
2012-05-15 11:18:40 +08:00
|
|
|
* Create a new input stream structure.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Returns the new input stream or NULL
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
|
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
|
|
|
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
|
|
|
|
if (input == NULL) {
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
2001-02-23 17:55:21 +00:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
memset(input, 0, sizeof(xmlParserInput));
|
|
|
|
input->line = 1;
|
|
|
|
input->col = 1;
|
2012-05-15 11:18:40 +08:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
/**
|
2024-07-06 22:04:06 +02:00
|
|
|
* xmlCtxtNewInputFromUrl:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: filename or URL
|
|
|
|
* @publicId: publid ID from doctype (optional)
|
|
|
|
* @encoding: character encoding (optional)
|
|
|
|
* @flags: unused, pass 0
|
|
|
|
*
|
|
|
|
* Creates a new parser input from the filesystem, the network or
|
|
|
|
* a user-defined resource loader.
|
|
|
|
*
|
|
|
|
* Returns a new parser input.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromUrl(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
const char *publicId, const char *encoding,
|
|
|
|
int flags ATTRIBUTE_UNUSED) {
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
|
|
|
if ((ctxt == NULL) || (url == NULL))
|
|
|
|
return(NULL);
|
|
|
|
|
2024-06-11 19:10:41 +02:00
|
|
|
input = xmlLoadResource(ctxt, url, publicId, XML_RESOURCE_MAIN_DOCUMENT);
|
2023-12-27 18:33:30 +01:00
|
|
|
if (input == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewInputInternal:
|
|
|
|
* @buf: parser input buffer
|
|
|
|
* @filename: filename or URL
|
|
|
|
*
|
|
|
|
* Internal helper function.
|
|
|
|
*
|
|
|
|
* Returns a new parser input.
|
|
|
|
*/
|
|
|
|
static xmlParserInputPtr
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlNewInputInternal(xmlParserInputBufferPtr buf, const char *filename) {
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
|
2023-12-27 18:33:30 +01:00
|
|
|
if (input == NULL) {
|
|
|
|
xmlFreeParserInputBuffer(buf);
|
|
|
|
return(NULL);
|
|
|
|
}
|
2024-06-11 02:15:18 +02:00
|
|
|
memset(input, 0, sizeof(xmlParserInput));
|
|
|
|
input->line = 1;
|
|
|
|
input->col = 1;
|
2023-12-27 18:33:30 +01:00
|
|
|
|
|
|
|
input->buf = buf;
|
|
|
|
xmlBufResetInput(input->buf->buffer, input);
|
|
|
|
|
|
|
|
if (filename != NULL) {
|
|
|
|
input->filename = xmlMemStrdup(filename);
|
|
|
|
if (input->filename == NULL) {
|
|
|
|
xmlFreeInputStream(input);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewInputFromMemory:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @url: base URL (optional)
|
|
|
|
* @mem: pointer to char array
|
|
|
|
* @size: size of array
|
|
|
|
* @flags: optimization hints
|
|
|
|
*
|
|
|
|
* Creates a new parser input to read from a memory area.
|
|
|
|
*
|
|
|
|
* @url is used as base to resolve external entities and for
|
|
|
|
* error reporting.
|
|
|
|
*
|
|
|
|
* If the XML_INPUT_BUF_STATIC flag is set, the memory area must
|
|
|
|
* stay unchanged until parsing has finished. This can avoid
|
|
|
|
* temporary copies.
|
|
|
|
*
|
|
|
|
* If the XML_INPUT_BUF_ZERO_TERMINATED flag is set, the memory
|
|
|
|
* area must contain a zero byte after the buffer at position @size.
|
|
|
|
* This can avoid temporary copies.
|
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
|
|
|
* Returns a new parser input or NULL if a memory allocation failed.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewInputFromMemory(const char *url, const void *mem, size_t size,
|
|
|
|
int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputBufferPtr buf;
|
|
|
|
|
|
|
|
if (mem == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
buf = xmlNewInputBufferMemory(mem, size, flags, XML_CHAR_ENCODING_NONE);
|
|
|
|
if (buf == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(xmlNewInputInternal(buf, url));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:04:06 +02:00
|
|
|
* xmlCtxtNewInputFromMemory:
|
2024-06-11 02:15:18 +02:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: base URL (optional)
|
|
|
|
* @mem: pointer to char array
|
|
|
|
* @size: size of array
|
|
|
|
* @encoding: character encoding (optional)
|
|
|
|
* @flags: optimization hints
|
|
|
|
*
|
|
|
|
* Returns a new parser input or NULL in case of error.
|
2023-12-27 18:33:30 +01:00
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromMemory(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
const void *mem, size_t size,
|
|
|
|
const char *encoding, int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputPtr input;
|
2023-12-27 18:33:30 +01:00
|
|
|
|
|
|
|
if ((ctxt == NULL) || (mem == NULL))
|
|
|
|
return(NULL);
|
|
|
|
|
2024-07-06 22:14:21 +02:00
|
|
|
input = xmlNewInputFromMemory(url, mem, size, flags);
|
2024-06-11 02:15:18 +02:00
|
|
|
if (input == NULL) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
2023-12-27 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewInputFromString:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @url: base URL (optional)
|
|
|
|
* @str: zero-terminated string
|
|
|
|
* @flags: optimization hints
|
|
|
|
*
|
|
|
|
* Creates a new parser input to read from a zero-terminated string.
|
|
|
|
*
|
|
|
|
* @url is used as base to resolve external entities and for
|
|
|
|
* error reporting.
|
|
|
|
*
|
|
|
|
* If the XML_INPUT_BUF_STATIC flag is set, the string must
|
|
|
|
* stay unchanged until parsing has finished. This can avoid
|
|
|
|
* temporary copies.
|
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
|
|
|
* Returns a new parser input or NULL if a memory allocation failed.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewInputFromString(const char *url, const char *str, int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputBufferPtr buf;
|
|
|
|
|
|
|
|
if (str == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
buf = xmlNewInputBufferString(str, flags);
|
|
|
|
if (buf == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(xmlNewInputInternal(buf, url));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:04:06 +02:00
|
|
|
* xmlCtxtNewInputFromString:
|
2024-06-11 02:15:18 +02:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: base URL (optional)
|
|
|
|
* @str: zero-terminated string
|
|
|
|
* @encoding: character encoding (optional)
|
|
|
|
* @flags: optimization hints
|
|
|
|
*
|
2023-12-27 18:33:30 +01:00
|
|
|
* Returns a new parser input.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
const char *str, const char *encoding, int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputPtr input;
|
2023-12-27 18:33:30 +01:00
|
|
|
|
|
|
|
if ((ctxt == NULL) || (str == NULL))
|
|
|
|
return(NULL);
|
|
|
|
|
2024-07-06 22:14:21 +02:00
|
|
|
input = xmlNewInputFromString(url, str, flags);
|
2024-06-11 02:15:18 +02:00
|
|
|
if (input == NULL) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
2023-12-27 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewInputFromFd:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @url: base URL (optional)
|
|
|
|
* @fd: file descriptor
|
|
|
|
* @flags: unused, pass 0
|
|
|
|
*
|
|
|
|
* Creates a new parser input to read from a zero-terminated string.
|
|
|
|
*
|
|
|
|
* @url is used as base to resolve external entities and for
|
|
|
|
* error reporting.
|
|
|
|
*
|
|
|
|
* @fd is closed after parsing has finished.
|
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
|
|
|
* Returns a new parser input or NULL if a memory allocation failed.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputBufferPtr buf;
|
|
|
|
|
|
|
|
if (fd < 0)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
|
|
|
|
if (buf == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(xmlNewInputInternal(buf, url));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:04:06 +02:00
|
|
|
* xmlCtxtNewInputFromFd:
|
2024-06-11 02:15:18 +02:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: base URL (optional)
|
|
|
|
* @fd: file descriptor
|
|
|
|
* @encoding: character encoding (optional)
|
|
|
|
* @flags: unused, pass 0
|
|
|
|
*
|
2023-12-27 18:33:30 +01:00
|
|
|
* Returns a new parser input.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromFd(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
int fd, const char *encoding, int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputPtr input;
|
2023-12-27 18:33:30 +01:00
|
|
|
|
|
|
|
if ((ctxt == NULL) || (fd < 0))
|
|
|
|
return(NULL);
|
|
|
|
|
2024-07-06 22:14:21 +02:00
|
|
|
input = xmlNewInputFromFd(url, fd, flags);
|
2024-06-11 02:15:18 +02:00
|
|
|
if (input == NULL) {
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
2023-12-27 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewInputFromIO:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @url: base URL (optional)
|
|
|
|
* @ioRead: read callback
|
|
|
|
* @ioClose: close callback (optional)
|
|
|
|
* @ioCtxt: IO context
|
|
|
|
* @flags: unused, pass 0
|
|
|
|
*
|
|
|
|
* Creates a new parser input to read from input callbacks and
|
|
|
|
* cintext.
|
|
|
|
*
|
|
|
|
* @url is used as base to resolve external entities and for
|
|
|
|
* error reporting.
|
|
|
|
*
|
|
|
|
* @ioRead is called to read new data into a provided buffer.
|
|
|
|
* It must return the number of bytes written into the buffer
|
|
|
|
* ot a negative xmlParserErrors code on failure.
|
|
|
|
*
|
|
|
|
* @ioClose is called after parsing has finished.
|
|
|
|
*
|
|
|
|
* @ioCtxt is an opaque pointer passed to the callbacks.
|
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
|
|
|
* Returns a new parser input or NULL if a memory allocation failed.
|
2023-12-27 18:33:30 +01:00
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewInputFromIO(const char *url, xmlInputReadCallback ioRead,
|
|
|
|
xmlInputCloseCallback ioClose, void *ioCtxt,
|
|
|
|
int flags ATTRIBUTE_UNUSED) {
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlParserInputBufferPtr buf;
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
if (ioRead == NULL)
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
|
|
|
|
if (buf == NULL) {
|
2024-01-04 23:25:06 +01:00
|
|
|
if (ioClose != NULL)
|
|
|
|
ioClose(ioCtxt);
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
buf->context = ioCtxt;
|
|
|
|
buf->readcallback = ioRead;
|
|
|
|
buf->closecallback = ioClose;
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
return(xmlNewInputInternal(buf, url));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:04:06 +02:00
|
|
|
* xmlCtxtNewInputFromIO:
|
2024-06-11 02:15:18 +02:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: base URL (optional)
|
|
|
|
* @ioRead: read callback
|
|
|
|
* @ioClose: close callback (optional)
|
|
|
|
* @ioCtxt: IO context
|
|
|
|
* @encoding: character encoding (optional)
|
|
|
|
* @flags: unused, pass 0
|
|
|
|
*
|
|
|
|
* Returns a new parser input.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:04:06 +02:00
|
|
|
xmlCtxtNewInputFromIO(xmlParserCtxtPtr ctxt, const char *url,
|
|
|
|
xmlInputReadCallback ioRead,
|
|
|
|
xmlInputCloseCallback ioClose,
|
|
|
|
void *ioCtxt, const char *encoding, int flags) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
|
|
|
if ((ctxt == NULL) || (ioRead == NULL))
|
|
|
|
return(NULL);
|
|
|
|
|
2024-07-06 22:14:21 +02:00
|
|
|
input = xmlNewInputFromIO(url, ioRead, ioClose, ioCtxt, flags);
|
2024-06-11 02:15:18 +02:00
|
|
|
if (input == NULL) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
2023-12-27 18:33:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewPushInput:
|
2023-12-27 18:33:30 +01:00
|
|
|
* @url: base URL (optional)
|
|
|
|
* @chunk: pointer to char array
|
|
|
|
* @size: size of array
|
|
|
|
*
|
|
|
|
* Creates a new parser input for a push parser.
|
|
|
|
*
|
2024-06-11 16:19:58 +02:00
|
|
|
* Returns a new parser input or NULL if a memory allocation failed.
|
2023-12-27 18:33:30 +01:00
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewPushInput(const char *url, const char *chunk, int size) {
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlParserInputBufferPtr buf;
|
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
|
|
|
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
|
2024-06-11 16:19:58 +02:00
|
|
|
if (buf == NULL)
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
input = xmlNewInputInternal(buf, url);
|
2024-06-11 16:19:58 +02:00
|
|
|
if (input == NULL)
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
input->flags |= XML_INPUT_PROGRESSIVE;
|
|
|
|
|
|
|
|
if ((size > 0) && (chunk != NULL)) {
|
|
|
|
int res;
|
|
|
|
|
|
|
|
res = xmlParserInputBufferPush(input->buf, size, chunk);
|
|
|
|
xmlBufResetInput(input->buf->buffer, input);
|
|
|
|
if (res < 0) {
|
|
|
|
xmlFreeInputStream(input);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlNewIOInputStream:
|
|
|
|
* @ctxt: an XML parser context
|
2024-05-20 13:58:22 +02:00
|
|
|
* @buf: an input buffer
|
2001-02-23 17:55:21 +00:00
|
|
|
* @enc: the charset encoding if known
|
|
|
|
*
|
|
|
|
* Create a new input stream structure encapsulating the @input into
|
|
|
|
* a stream suitable for the parser.
|
|
|
|
*
|
|
|
|
* Returns the new input stream or NULL
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr buf,
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlCharEncoding enc) {
|
2024-06-11 02:15:18 +02:00
|
|
|
xmlParserInputPtr input;
|
2023-12-27 18:33:30 +01:00
|
|
|
const char *encoding;
|
2012-07-16 16:28:47 +08:00
|
|
|
|
2024-06-28 01:41:36 +02:00
|
|
|
if ((ctxt == NULL) || (buf == NULL))
|
2023-12-27 18:33:30 +01:00
|
|
|
return(NULL);
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
input = xmlNewInputInternal(buf, NULL);
|
|
|
|
if (input == NULL) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
encoding = xmlGetCharEncodingName(enc);
|
2024-06-11 02:15:18 +02:00
|
|
|
if (encoding != NULL)
|
|
|
|
xmlSwitchInputEncodingName(ctxt, input, encoding);
|
|
|
|
|
|
|
|
return(input);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewEntityInputStream:
|
|
|
|
* @ctxt: an XML parser context
|
2024-05-20 13:58:22 +02:00
|
|
|
* @ent: an Entity pointer
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2023-03-13 19:38:41 +01:00
|
|
|
* DEPRECATED: Internal function, do not use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Create a new input stream based on an xmlEntityPtr
|
|
|
|
*
|
|
|
|
* Returns the new input stream or NULL
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
2023-12-27 15:50:58 +01:00
|
|
|
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
2023-12-27 15:50:58 +01:00
|
|
|
if ((ctxt == NULL) || (ent == NULL))
|
2001-02-23 17:55:21 +00:00
|
|
|
return(NULL);
|
2023-12-23 01:09:17 +01:00
|
|
|
|
2023-12-27 15:50:58 +01:00
|
|
|
if (ent->content != NULL) {
|
2024-07-06 22:04:06 +02:00
|
|
|
input = xmlCtxtNewInputFromString(ctxt, NULL,
|
|
|
|
(const char *) ent->content, NULL, XML_INPUT_BUF_STATIC);
|
2023-12-27 15:50:58 +01:00
|
|
|
} else if (ent->URI != NULL) {
|
2024-06-11 19:10:41 +02:00
|
|
|
xmlResourceType rtype;
|
|
|
|
|
|
|
|
if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY)
|
|
|
|
rtype = XML_RESOURCE_PARAMETER_ENTITY;
|
|
|
|
else
|
|
|
|
rtype = XML_RESOURCE_GENERAL_ENTITY;
|
|
|
|
|
|
|
|
input = xmlLoadResource(ctxt, (char *) ent->URI,
|
|
|
|
(char *) ent->ExternalID, rtype);
|
2023-12-27 15:50:58 +01:00
|
|
|
} else {
|
2024-05-10 02:04:52 +02:00
|
|
|
return(NULL);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
2023-12-27 15:50:58 +01:00
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
if (input == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
2023-12-27 15:50:58 +01:00
|
|
|
input->entity = ent;
|
2023-12-23 01:09:17 +01:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewStringInputStream:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
* @buffer: an memory buffer
|
|
|
|
*
|
2024-07-06 22:14:21 +02:00
|
|
|
* DEPRECATED: Use xmlNewInputFromString.
|
2024-07-02 02:18:03 +02:00
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Create a new input stream based on a memory buffer.
|
2023-12-27 18:33:30 +01:00
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Returns the new input stream
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
|
2024-07-06 22:04:06 +02:00
|
|
|
return(xmlCtxtNewInputFromString(ctxt, NULL, (const char *) buffer,
|
|
|
|
NULL, 0));
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
/****************************************************************
|
|
|
|
* *
|
|
|
|
* External entities loading *
|
|
|
|
* *
|
|
|
|
****************************************************************/
|
|
|
|
|
|
|
|
#ifdef LIBXML_CATALOG_ENABLED
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlResolveResourceFromCatalog:
|
|
|
|
* @URL: the URL for the entity to load
|
|
|
|
* @ID: the System ID for the entity to load
|
|
|
|
* @ctxt: the context in which the entity is called or NULL
|
|
|
|
*
|
|
|
|
* Resolves the URL and ID against the appropriate catalog.
|
|
|
|
* This function is used by xmlDefaultExternalEntityLoader and
|
|
|
|
* xmlNoNetExternalEntityLoader.
|
|
|
|
*
|
|
|
|
* Returns a new allocated URL, or NULL.
|
|
|
|
*/
|
|
|
|
static xmlChar *
|
|
|
|
xmlResolveResourceFromCatalog(const char *URL, const char *ID,
|
|
|
|
xmlParserCtxtPtr ctxt) {
|
|
|
|
xmlChar *resource = NULL;
|
|
|
|
xmlCatalogAllow pref;
|
2024-07-02 20:57:15 +02:00
|
|
|
int allowLocal = 0;
|
|
|
|
int allowGlobal = 0;
|
2023-12-23 00:35:30 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the resource doesn't exists as a file,
|
|
|
|
* try to load it from the resource pointed in the catalogs
|
|
|
|
*/
|
|
|
|
pref = xmlCatalogGetDefaults();
|
|
|
|
|
2024-07-02 20:57:15 +02:00
|
|
|
if ((ctxt != NULL) && (ctxt->catalogs != NULL) &&
|
|
|
|
((pref == XML_CATA_ALLOW_ALL) ||
|
|
|
|
(pref == XML_CATA_ALLOW_DOCUMENT)))
|
|
|
|
allowLocal = 1;
|
|
|
|
|
|
|
|
if (((ctxt == NULL) ||
|
|
|
|
((ctxt->options & XML_PARSE_NO_SYS_CATALOG) == 0)) &&
|
|
|
|
((pref == XML_CATA_ALLOW_ALL) ||
|
|
|
|
(pref == XML_CATA_ALLOW_GLOBAL)))
|
|
|
|
allowGlobal = 1;
|
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
if ((pref != XML_CATA_ALLOW_NONE) && (!xmlNoNetExists(URL))) {
|
|
|
|
/*
|
|
|
|
* Do a local lookup
|
|
|
|
*/
|
2024-07-02 20:57:15 +02:00
|
|
|
if (allowLocal) {
|
2023-12-23 00:35:30 +01:00
|
|
|
resource = xmlCatalogLocalResolve(ctxt->catalogs,
|
|
|
|
(const xmlChar *)ID,
|
|
|
|
(const xmlChar *)URL);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Try a global lookup
|
|
|
|
*/
|
2024-07-02 20:57:15 +02:00
|
|
|
if ((resource == NULL) && (allowGlobal)) {
|
2023-12-23 00:35:30 +01:00
|
|
|
resource = xmlCatalogResolve((const xmlChar *)ID,
|
|
|
|
(const xmlChar *)URL);
|
|
|
|
}
|
|
|
|
if ((resource == NULL) && (URL != NULL))
|
|
|
|
resource = xmlStrdup((const xmlChar *) URL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* TODO: do an URI lookup on the reference
|
|
|
|
*/
|
|
|
|
if ((resource != NULL) && (!xmlNoNetExists((const char *)resource))) {
|
|
|
|
xmlChar *tmp = NULL;
|
|
|
|
|
2024-07-02 20:57:15 +02:00
|
|
|
if (allowLocal) {
|
2023-12-23 00:35:30 +01:00
|
|
|
tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
|
|
|
|
}
|
2024-07-02 20:57:15 +02:00
|
|
|
if ((tmp == NULL) && (allowGlobal)) {
|
2023-12-23 00:35:30 +01:00
|
|
|
tmp = xmlCatalogResolveURI(resource);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tmp != NULL) {
|
|
|
|
xmlFree(resource);
|
|
|
|
resource = tmp;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return resource;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2024-06-17 13:09:37 +02:00
|
|
|
#ifdef LIBXML_HTTP_ENABLED
|
|
|
|
static int
|
|
|
|
xmlCheckHTTPInputInternal(xmlParserInputPtr input) {
|
|
|
|
const char *encoding;
|
|
|
|
const char *redir;
|
|
|
|
const char *mime;
|
|
|
|
int code;
|
|
|
|
|
|
|
|
if ((input == NULL) || (input->buf == NULL) ||
|
|
|
|
(input->buf->readcallback != xmlIOHTTPRead) ||
|
|
|
|
(input->buf->context == NULL))
|
|
|
|
return(XML_ERR_OK);
|
|
|
|
|
|
|
|
code = xmlNanoHTTPReturnCode(input->buf->context);
|
|
|
|
if (code >= 400) {
|
|
|
|
/* fatal error */
|
|
|
|
return(XML_IO_LOAD_ERROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
mime = xmlNanoHTTPMimeType(input->buf->context);
|
|
|
|
if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
|
|
|
|
(xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
|
|
|
|
encoding = xmlNanoHTTPEncoding(input->buf->context);
|
2024-06-28 00:34:52 +02:00
|
|
|
if (encoding != NULL) {
|
|
|
|
xmlCharEncodingHandlerPtr handler;
|
|
|
|
int res;
|
|
|
|
|
|
|
|
res = xmlOpenCharEncodingHandler(encoding, /* output */ 0,
|
|
|
|
&handler);
|
|
|
|
if (res == 0)
|
|
|
|
xmlInputSetEncodingHandler(input, handler);
|
|
|
|
}
|
2024-06-17 13:09:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
redir = xmlNanoHTTPRedir(input->buf->context);
|
|
|
|
if (redir != NULL) {
|
|
|
|
if (input->filename != NULL)
|
|
|
|
xmlFree((xmlChar *) input->filename);
|
|
|
|
input->filename = xmlMemStrdup(redir);
|
|
|
|
if (input->filename == NULL)
|
|
|
|
return(XML_ERR_NO_MEMORY);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(XML_ERR_OK);
|
|
|
|
}
|
|
|
|
#endif /* LIBXML_HTTP_ENABLED */
|
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
/**
|
|
|
|
* xmlCheckHTTPInput:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
* @ret: an XML parser input
|
|
|
|
*
|
|
|
|
* DEPRECATED: Internal function, don't use.
|
|
|
|
*
|
|
|
|
* Check an input in case it was created from an HTTP stream, in that
|
|
|
|
* case it will handle encoding and update of the base URL in case of
|
|
|
|
* redirection. It also checks for HTTP errors in which case the input
|
|
|
|
* is cleanly freed up and an appropriate error is raised in context
|
|
|
|
*
|
|
|
|
* Returns the input or NULL in case of HTTP error.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
|
|
|
|
/* Avoid unused variable warning if features are disabled. */
|
|
|
|
(void) ctxt;
|
|
|
|
|
|
|
|
#ifdef LIBXML_HTTP_ENABLED
|
2024-06-17 13:09:37 +02:00
|
|
|
{
|
|
|
|
int code = xmlCheckHTTPInputInternal(ret);
|
|
|
|
|
|
|
|
if (code != XML_ERR_OK) {
|
|
|
|
if (ret->filename != NULL)
|
2023-12-23 00:35:30 +01:00
|
|
|
xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, ret->filename);
|
2024-06-17 13:09:37 +02:00
|
|
|
else
|
2023-12-23 00:35:30 +01:00
|
|
|
xmlCtxtErrIO(ctxt, XML_IO_LOAD_ERROR, "<null>");
|
|
|
|
xmlFreeInputStream(ret);
|
2024-06-17 13:09:37 +02:00
|
|
|
return(NULL);
|
2023-12-23 00:35:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2024-06-17 13:09:37 +02:00
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
2024-07-06 22:14:21 +02:00
|
|
|
* xmlNewInputFromUrl:
|
2001-02-23 17:55:21 +00:00
|
|
|
* @filename: the filename to use as entity
|
2024-06-11 02:15:18 +02:00
|
|
|
* @flags: XML_INPUT flags
|
|
|
|
* @out: pointer to new parser input
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Create a new input stream based on a file or a URL.
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* The flag XML_INPUT_UNZIP allows decompression.
|
|
|
|
*
|
2024-06-11 18:11:51 +02:00
|
|
|
* The flag XML_INPUT_NETWORK allows network access.
|
|
|
|
*
|
2024-07-02 02:18:03 +02:00
|
|
|
* The following resource loaders will be called if they were
|
|
|
|
* registered (in order of precedence):
|
|
|
|
*
|
|
|
|
* - the per-thread xmlParserInputBufferCreateFilenameFunc set with
|
|
|
|
* xmlParserInputBufferCreateFilenameDefault (deprecated)
|
|
|
|
* - the default loader which will return
|
|
|
|
* - the result from a matching global input callback set with
|
|
|
|
* xmlRegisterInputCallbacks (deprecated)
|
|
|
|
* - a HTTP resource if support is compiled in.
|
|
|
|
* - a file opened from the filesystem, with automatic detection
|
|
|
|
* of compressed files if support is compiled in.
|
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
|
|
|
* Returns an xmlParserErrors code.
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
2024-06-11 02:15:18 +02:00
|
|
|
int
|
2024-07-06 22:14:21 +02:00
|
|
|
xmlNewInputFromUrl(const char *filename, int flags, xmlParserInputPtr *out) {
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlParserInputBufferPtr buf;
|
2024-06-11 00:52:04 +02:00
|
|
|
xmlParserInputPtr input;
|
2024-06-10 18:51:56 +02:00
|
|
|
int code = XML_ERR_OK;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2024-06-11 02:15:18 +02:00
|
|
|
if (out == NULL)
|
|
|
|
return(XML_ERR_ARGUMENT);
|
|
|
|
*out = NULL;
|
|
|
|
if (filename == NULL)
|
|
|
|
return(XML_ERR_ARGUMENT);
|
2023-12-19 15:41:37 +01:00
|
|
|
|
2024-06-10 18:51:56 +02:00
|
|
|
if (xmlParserInputBufferCreateFilenameValue != NULL) {
|
|
|
|
buf = xmlParserInputBufferCreateFilenameValue(filename,
|
|
|
|
XML_CHAR_ENCODING_NONE);
|
|
|
|
if (buf == NULL)
|
|
|
|
code = XML_IO_ENOENT;
|
|
|
|
} else {
|
2024-06-10 23:57:52 +02:00
|
|
|
code = xmlParserInputBufferCreateUrl(filename, XML_CHAR_ENCODING_NONE,
|
2024-06-11 00:00:32 +02:00
|
|
|
flags, &buf);
|
2024-06-10 18:51:56 +02:00
|
|
|
}
|
2024-06-11 02:15:18 +02:00
|
|
|
if (code != XML_ERR_OK)
|
|
|
|
return(code);
|
|
|
|
|
|
|
|
input = xmlNewInputInternal(buf, filename);
|
|
|
|
if (input == NULL)
|
|
|
|
return(XML_ERR_NO_MEMORY);
|
|
|
|
|
2024-06-17 13:09:37 +02:00
|
|
|
#ifdef LIBXML_HTTP_ENABLED
|
|
|
|
code = xmlCheckHTTPInputInternal(input);
|
|
|
|
if (code != XML_ERR_OK) {
|
|
|
|
xmlFreeInputStream(input);
|
|
|
|
return(code);
|
|
|
|
}
|
|
|
|
#endif
|
2024-06-11 02:15:18 +02:00
|
|
|
|
|
|
|
*out = input;
|
|
|
|
return(XML_ERR_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewInputFromFile:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
* @filename: the filename to use as entity
|
|
|
|
*
|
2024-07-06 22:14:21 +02:00
|
|
|
* DEPRECATED: Use xmlNewInputFromUrl.
|
2024-07-02 02:18:03 +02:00
|
|
|
*
|
2024-06-11 02:15:18 +02:00
|
|
|
* Create a new input stream based on a file or an URL.
|
|
|
|
*
|
|
|
|
* Returns the new input stream or NULL in case of error
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
|
|
|
xmlParserInputPtr input;
|
|
|
|
int flags = 0;
|
|
|
|
int code;
|
|
|
|
|
|
|
|
if ((ctxt == NULL) || (filename == NULL))
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
|
|
|
|
flags |= XML_INPUT_UNZIP;
|
2024-06-11 18:11:51 +02:00
|
|
|
if ((ctxt->options & XML_PARSE_NONET) == 0)
|
|
|
|
flags |= XML_INPUT_NETWORK;
|
2024-06-11 02:15:18 +02:00
|
|
|
|
2024-07-06 22:14:21 +02:00
|
|
|
code = xmlNewInputFromUrl(filename, flags, &input);
|
2024-06-10 18:51:56 +02:00
|
|
|
if (code != XML_ERR_OK) {
|
2023-12-19 19:52:28 +01:00
|
|
|
xmlCtxtErrIO(ctxt, code, filename);
|
2024-06-11 02:15:18 +02:00
|
|
|
return(NULL);
|
2003-10-27 11:25:13 +00:00
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2024-06-11 00:52:04 +02:00
|
|
|
return(input);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
/**
|
|
|
|
* xmlDefaultExternalEntityLoader:
|
|
|
|
* @URL: the URL for the entity to load
|
|
|
|
* @ID: the System ID for the entity to load
|
|
|
|
* @ctxt: the context in which the entity is called or NULL
|
|
|
|
*
|
|
|
|
* By default we don't load external entities, yet.
|
|
|
|
*
|
|
|
|
* Returns a new allocated xmlParserInputPtr, or NULL.
|
|
|
|
*/
|
|
|
|
static xmlParserInputPtr
|
2024-06-11 00:37:11 +02:00
|
|
|
xmlDefaultExternalEntityLoader(const char *url, const char *ID,
|
2023-12-23 00:35:30 +01:00
|
|
|
xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
2024-06-11 00:37:11 +02:00
|
|
|
xmlParserInputPtr input = NULL;
|
|
|
|
char *resource = NULL;
|
2023-12-23 00:35:30 +01:00
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
(void) ID;
|
2023-12-23 00:35:30 +01:00
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
if (url == NULL)
|
|
|
|
return(NULL);
|
2023-12-23 00:35:30 +01:00
|
|
|
|
|
|
|
#ifdef LIBXML_CATALOG_ENABLED
|
2024-06-11 00:37:11 +02:00
|
|
|
resource = (char *) xmlResolveResourceFromCatalog(url, ID, ctxt);
|
|
|
|
if (resource != NULL)
|
|
|
|
url = resource;
|
2023-12-23 00:35:30 +01:00
|
|
|
#endif
|
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
if ((ctxt != NULL) &&
|
|
|
|
(ctxt->options & XML_PARSE_NONET) &&
|
2024-06-12 18:19:55 +02:00
|
|
|
(xmlStrncasecmp(BAD_CAST url, BAD_CAST "http://", 7) == 0)) {
|
2024-06-11 00:37:11 +02:00
|
|
|
xmlCtxtErrIO(ctxt, XML_IO_NETWORK_ATTEMPT, url);
|
|
|
|
} else {
|
|
|
|
input = xmlNewInputFromFile(ctxt, url);
|
|
|
|
}
|
2023-12-23 00:35:30 +01:00
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
if (resource != NULL)
|
|
|
|
xmlFree(resource);
|
|
|
|
return(input);
|
2023-12-23 00:35:30 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNoNetExternalEntityLoader:
|
|
|
|
* @URL: the URL for the entity to load
|
|
|
|
* @ID: the System ID for the entity to load
|
|
|
|
* @ctxt: the context in which the entity is called or NULL
|
|
|
|
*
|
2024-06-11 03:51:43 +02:00
|
|
|
* DEPRECATED: Use XML_PARSE_NONET.
|
|
|
|
*
|
2023-12-23 00:35:30 +01:00
|
|
|
* A specific entity loader disabling network accesses, though still
|
|
|
|
* allowing local catalog accesses for resolution.
|
|
|
|
*
|
|
|
|
* Returns a new allocated xmlParserInputPtr, or NULL.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlNoNetExternalEntityLoader(const char *URL, const char *ID,
|
|
|
|
xmlParserCtxtPtr ctxt) {
|
2024-06-11 00:37:11 +02:00
|
|
|
int oldOptions = 0;
|
|
|
|
xmlParserInputPtr input;
|
2023-12-23 00:35:30 +01:00
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
if (ctxt != NULL) {
|
|
|
|
oldOptions = ctxt->options;
|
|
|
|
ctxt->options |= XML_PARSE_NONET;
|
|
|
|
}
|
2023-12-23 00:35:30 +01:00
|
|
|
|
2024-06-11 00:37:11 +02:00
|
|
|
input = xmlDefaultExternalEntityLoader(URL, ID, ctxt);
|
|
|
|
|
|
|
|
if (ctxt != NULL)
|
|
|
|
ctxt->options = oldOptions;
|
2023-12-23 00:35:30 +01:00
|
|
|
|
|
|
|
return(input);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This global has to die eventually
|
|
|
|
*/
|
|
|
|
static xmlExternalEntityLoader
|
|
|
|
xmlCurrentExternalEntityLoader = xmlDefaultExternalEntityLoader;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlSetExternalEntityLoader:
|
|
|
|
* @f: the new entity resolver function
|
|
|
|
*
|
2024-06-11 03:51:43 +02:00
|
|
|
* DEPRECATED: This is a global setting and not thread-safe. Use
|
|
|
|
* xmlCtxtSetResourceLoader or similar functions.
|
|
|
|
*
|
|
|
|
* Changes the default external entity resolver function for the
|
|
|
|
* application.
|
2023-12-23 00:35:30 +01:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
|
|
|
|
xmlCurrentExternalEntityLoader = f;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlGetExternalEntityLoader:
|
|
|
|
*
|
2024-06-11 03:51:43 +02:00
|
|
|
* DEPRECATED: See xmlSetExternalEntityLoader.
|
|
|
|
*
|
2023-12-23 00:35:30 +01:00
|
|
|
* Get the default external entity resolver function for the application
|
|
|
|
*
|
|
|
|
* Returns the xmlExternalEntityLoader function pointer
|
|
|
|
*/
|
|
|
|
xmlExternalEntityLoader
|
|
|
|
xmlGetExternalEntityLoader(void) {
|
|
|
|
return(xmlCurrentExternalEntityLoader);
|
|
|
|
}
|
|
|
|
|
2024-06-11 03:51:43 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtSetResourceLoader:
|
|
|
|
* @ctxt: parser context
|
|
|
|
* @loader: callback
|
|
|
|
* @vctxt: user data
|
|
|
|
*
|
|
|
|
* Installs a custom callback to load documents, DTDs or external
|
|
|
|
* entities.
|
|
|
|
*
|
|
|
|
* Available since 2.14.0.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtSetResourceLoader(xmlParserCtxtPtr ctxt, xmlResourceLoader loader,
|
|
|
|
void *vctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ctxt->resourceLoader = loader;
|
|
|
|
ctxt->resourceCtxt = vctxt;
|
|
|
|
}
|
|
|
|
|
2024-06-11 19:10:41 +02:00
|
|
|
/**
|
|
|
|
* xmlLoadResource:
|
|
|
|
* @ctxt: parser context
|
|
|
|
* @url: the URL for the entity to load
|
|
|
|
* @publicId: the Public ID for the entity to load
|
|
|
|
* @type: resource type
|
|
|
|
*
|
|
|
|
* Returns the xmlParserInputPtr or NULL in case of error.
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlLoadResource(xmlParserCtxtPtr ctxt, const char *url, const char *publicId,
|
|
|
|
xmlResourceType type) {
|
|
|
|
char *canonicFilename;
|
|
|
|
xmlParserInputPtr ret;
|
|
|
|
|
|
|
|
if (url == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
if ((ctxt != NULL) && (ctxt->resourceLoader != NULL)) {
|
2024-06-17 15:29:56 +02:00
|
|
|
char *resource = NULL;
|
2024-06-11 19:10:41 +02:00
|
|
|
int flags = 0;
|
|
|
|
int code;
|
|
|
|
|
2024-07-02 01:42:33 +02:00
|
|
|
#ifdef LIBXML_CATALOG_ENABLED
|
2024-06-17 15:29:56 +02:00
|
|
|
resource = (char *) xmlResolveResourceFromCatalog(url, publicId, ctxt);
|
|
|
|
if (resource != NULL)
|
|
|
|
url = resource;
|
|
|
|
#endif
|
|
|
|
|
2024-06-11 19:10:41 +02:00
|
|
|
if ((ctxt->options & XML_PARSE_NO_UNZIP) == 0)
|
|
|
|
flags |= XML_INPUT_UNZIP;
|
|
|
|
if ((ctxt->options & XML_PARSE_NONET) == 0)
|
|
|
|
flags |= XML_INPUT_NETWORK;
|
|
|
|
|
2024-06-17 12:55:44 +02:00
|
|
|
code = ctxt->resourceLoader(ctxt->resourceCtxt, url, publicId, type,
|
|
|
|
flags, &ret);
|
2024-06-11 19:10:41 +02:00
|
|
|
if (code != XML_ERR_OK) {
|
|
|
|
xmlCtxtErrIO(ctxt, code, url);
|
2024-07-02 19:46:51 +02:00
|
|
|
ret = NULL;
|
2024-06-11 19:10:41 +02:00
|
|
|
}
|
2024-07-02 19:46:51 +02:00
|
|
|
if (resource != NULL)
|
|
|
|
xmlFree(resource);
|
2024-06-11 19:10:41 +02:00
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
canonicFilename = (char *) xmlCanonicPath((const xmlChar *) url);
|
|
|
|
if (canonicFilename == NULL) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = xmlCurrentExternalEntityLoader(canonicFilename, publicId, ctxt);
|
|
|
|
xmlFree(canonicFilename);
|
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
2023-12-23 00:35:30 +01:00
|
|
|
/**
|
|
|
|
* xmlLoadExternalEntity:
|
|
|
|
* @URL: the URL for the entity to load
|
|
|
|
* @ID: the Public ID for the entity to load
|
|
|
|
* @ctxt: the context in which the entity is called or NULL
|
|
|
|
*
|
2024-06-10 14:04:00 +02:00
|
|
|
* @URL is a filename or URL. If if contains the substring "://",
|
|
|
|
* it is assumed to be a Legacy Extended IRI. Otherwise, it is
|
|
|
|
* treated as a filesystem path.
|
|
|
|
*
|
|
|
|
* @ID is an optional XML public ID, typically from a doctype
|
|
|
|
* declaration. It is used for catalog lookups.
|
|
|
|
*
|
2024-07-02 02:18:03 +02:00
|
|
|
* If catalog lookup is enabled (default is yes) and URL or ID are
|
|
|
|
* found in system or local XML catalogs, URL is replaced with the
|
|
|
|
* result. Then the following resource loaders will be called if
|
|
|
|
* they were registered (in order of precedence):
|
2024-06-10 14:04:00 +02:00
|
|
|
*
|
2024-06-11 03:51:43 +02:00
|
|
|
* - the resource loader set with xmlCtxtSetResourceLoader
|
2024-06-10 14:04:00 +02:00
|
|
|
* - the global external entity loader set with
|
2024-07-02 02:18:03 +02:00
|
|
|
* xmlSetExternalEntityLoader (without catalog resolution,
|
|
|
|
* deprecated)
|
2024-06-10 14:04:00 +02:00
|
|
|
* - the per-thread xmlParserInputBufferCreateFilenameFunc set with
|
2024-07-02 02:18:03 +02:00
|
|
|
* xmlParserInputBufferCreateFilenameDefault (deprecated)
|
2024-06-10 14:04:00 +02:00
|
|
|
* - the default loader which will return
|
|
|
|
* - the result from a matching global input callback set with
|
2024-07-02 02:18:03 +02:00
|
|
|
* xmlRegisterInputCallbacks (deprecated)
|
2024-06-10 14:04:00 +02:00
|
|
|
* - a HTTP resource if support is compiled in.
|
|
|
|
* - a file opened from the filesystem, with automatic detection
|
|
|
|
* of compressed files if support is compiled in.
|
|
|
|
*
|
2023-12-23 00:35:30 +01:00
|
|
|
* Returns the xmlParserInputPtr or NULL
|
|
|
|
*/
|
|
|
|
xmlParserInputPtr
|
|
|
|
xmlLoadExternalEntity(const char *URL, const char *ID,
|
|
|
|
xmlParserCtxtPtr ctxt) {
|
2024-06-11 19:10:41 +02:00
|
|
|
return(xmlLoadResource(ctxt, URL, ID, XML_RESOURCE_UNKNOWN));
|
2023-12-23 00:35:30 +01:00
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/************************************************************************
|
|
|
|
* *
|
|
|
|
* Commodity functions to handle parser contexts *
|
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/**
|
2022-08-24 04:21:58 +02:00
|
|
|
* xmlInitSAXParserCtxt:
|
|
|
|
* @ctxt: XML parser context
|
|
|
|
* @sax: SAX handlert
|
|
|
|
* @userData: user data
|
2001-02-23 17:55:21 +00:00
|
|
|
*
|
2022-08-24 04:21:58 +02:00
|
|
|
* Initialize a SAX parser context
|
2003-04-24 16:06:47 +00:00
|
|
|
*
|
|
|
|
* Returns 0 in case of success and -1 in case of error
|
2001-02-23 17:55:21 +00:00
|
|
|
*/
|
|
|
|
|
2022-08-24 04:21:58 +02:00
|
|
|
static int
|
2022-09-01 00:13:19 +02:00
|
|
|
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
|
2022-08-24 04:21:58 +02:00
|
|
|
void *userData)
|
2001-02-23 17:55:21 +00:00
|
|
|
{
|
2004-11-09 14:59:59 +00:00
|
|
|
xmlParserInputPtr input;
|
2024-07-06 15:48:43 +02:00
|
|
|
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
|
|
size_t initialNodeTabSize = 1;
|
|
|
|
#else
|
|
|
|
size_t initialNodeTabSize = 10;
|
|
|
|
#endif
|
2004-11-09 14:59:59 +00:00
|
|
|
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2001-08-31 14:55:30 +00:00
|
|
|
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->dict == NULL)
|
|
|
|
ctxt->dict = xmlDictCreate();
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->dict == NULL)
|
2003-08-18 12:15:38 +00:00
|
|
|
return(-1);
|
2012-07-30 10:08:45 +08:00
|
|
|
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->sax == NULL)
|
|
|
|
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->sax == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2022-08-24 04:21:58 +02:00
|
|
|
if (sax == NULL) {
|
|
|
|
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
|
2003-09-25 14:29:29 +00:00
|
|
|
xmlSAXVersion(ctxt->sax, 2);
|
2022-08-24 04:21:58 +02:00
|
|
|
ctxt->userData = ctxt;
|
|
|
|
} else {
|
|
|
|
if (sax->initialized == XML_SAX2_MAGIC) {
|
|
|
|
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
|
|
|
|
} else {
|
|
|
|
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
|
|
|
|
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
|
|
|
|
}
|
|
|
|
ctxt->userData = userData ? userData : ctxt;
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2003-08-19 15:01:28 +00:00
|
|
|
ctxt->maxatts = 0;
|
|
|
|
ctxt->atts = NULL;
|
2001-02-23 17:55:21 +00:00
|
|
|
/* Allocate the Input stack */
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->inputTab == NULL) {
|
2024-07-06 15:48:43 +02:00
|
|
|
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
|
|
size_t initialSize = 1;
|
|
|
|
#else
|
|
|
|
size_t initialSize = 5;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
ctxt->inputTab = xmlMalloc(initialSize * sizeof(xmlParserInputPtr));
|
|
|
|
ctxt->inputMax = initialSize;
|
2004-11-02 14:52:23 +00:00
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->inputTab == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2024-11-17 20:13:14 +01:00
|
|
|
while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
|
2004-11-09 14:59:59 +00:00
|
|
|
xmlFreeInputStream(input);
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->inputNr = 0;
|
|
|
|
ctxt->input = NULL;
|
|
|
|
|
|
|
|
ctxt->version = NULL;
|
|
|
|
ctxt->encoding = NULL;
|
|
|
|
ctxt->standalone = -1;
|
|
|
|
ctxt->hasExternalSubset = 0;
|
|
|
|
ctxt->hasPErefs = 0;
|
|
|
|
ctxt->html = 0;
|
|
|
|
ctxt->instate = XML_PARSER_START;
|
|
|
|
|
|
|
|
/* Allocate the Node stack */
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->nodeTab == NULL) {
|
2024-07-06 15:48:43 +02:00
|
|
|
ctxt->nodeTab = xmlMalloc(initialNodeTabSize * sizeof(xmlNodePtr));
|
|
|
|
ctxt->nodeMax = initialNodeTabSize;
|
2004-11-02 14:52:23 +00:00
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->nodeTab == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->nodeNr = 0;
|
|
|
|
ctxt->node = NULL;
|
|
|
|
|
|
|
|
/* Allocate the Name stack */
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->nameTab == NULL) {
|
2024-07-06 15:48:43 +02:00
|
|
|
ctxt->nameTab = xmlMalloc(initialNodeTabSize * sizeof(xmlChar *));
|
|
|
|
ctxt->nameMax = initialNodeTabSize;
|
2004-11-02 14:52:23 +00:00
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->nameTab == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->nameNr = 0;
|
|
|
|
ctxt->name = NULL;
|
|
|
|
|
|
|
|
/* Allocate the space stack */
|
2004-11-02 14:52:23 +00:00
|
|
|
if (ctxt->spaceTab == NULL) {
|
2024-07-06 15:48:43 +02:00
|
|
|
ctxt->spaceTab = xmlMalloc(initialNodeTabSize * sizeof(int));
|
|
|
|
ctxt->spaceMax = initialNodeTabSize;
|
2004-11-02 14:52:23 +00:00
|
|
|
}
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt->spaceTab == NULL)
|
2003-04-24 16:06:47 +00:00
|
|
|
return(-1);
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->spaceNr = 1;
|
|
|
|
ctxt->spaceTab[0] = -1;
|
|
|
|
ctxt->space = &ctxt->spaceTab[0];
|
|
|
|
ctxt->myDoc = NULL;
|
|
|
|
ctxt->wellFormed = 1;
|
2003-09-11 23:42:01 +00:00
|
|
|
ctxt->nsWellFormed = 1;
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->valid = 1;
|
2024-01-05 01:14:28 +01:00
|
|
|
|
|
|
|
ctxt->options = XML_PARSE_NODICT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize some parser options from deprecated global variables.
|
|
|
|
* Note that the "modern" API taking options arguments or
|
|
|
|
* xmlCtxtSetOptions will ignore these defaults. They're only
|
|
|
|
* relevant if old API functions like xmlParseFile are used.
|
|
|
|
*/
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
|
2014-06-11 16:59:16 +08:00
|
|
|
if (ctxt->loadsubset) {
|
|
|
|
ctxt->options |= XML_PARSE_DTDLOAD;
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->validate = xmlDoValidityCheckingDefaultValue;
|
2024-01-05 01:14:28 +01:00
|
|
|
if (ctxt->validate) {
|
|
|
|
ctxt->options |= XML_PARSE_DTDVALID;
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->pedantic = xmlPedanticParserDefaultValue;
|
2014-06-11 16:59:16 +08:00
|
|
|
if (ctxt->pedantic) {
|
|
|
|
ctxt->options |= XML_PARSE_PEDANTIC;
|
|
|
|
}
|
2001-07-25 17:18:57 +00:00
|
|
|
ctxt->linenumbers = xmlLineNumbersDefaultValue;
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
|
2014-06-11 16:59:16 +08:00
|
|
|
if (ctxt->keepBlanks == 0) {
|
2003-09-26 14:51:39 +00:00
|
|
|
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
2014-06-11 16:59:16 +08:00
|
|
|
ctxt->options |= XML_PARSE_NOBLANKS;
|
|
|
|
}
|
2024-01-05 01:14:28 +01:00
|
|
|
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
|
|
|
|
if (ctxt->replaceEntities) {
|
|
|
|
ctxt->options |= XML_PARSE_NOENT;
|
|
|
|
}
|
|
|
|
if (xmlGetWarningsDefaultValue == 0)
|
|
|
|
ctxt->options |= XML_PARSE_NOWARNING;
|
2001-09-14 10:29:27 +00:00
|
|
|
|
2022-01-13 17:06:14 +01:00
|
|
|
ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->vctxt.userData = ctxt;
|
2002-02-03 20:13:06 +00:00
|
|
|
ctxt->vctxt.error = xmlParserValidityError;
|
|
|
|
ctxt->vctxt.warning = xmlParserValidityWarning;
|
2024-01-05 01:14:28 +01:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt->record_info = 0;
|
|
|
|
ctxt->checkIndex = 0;
|
|
|
|
ctxt->inSubset = 0;
|
|
|
|
ctxt->errNo = XML_ERR_OK;
|
|
|
|
ctxt->depth = 0;
|
2001-08-22 14:29:45 +00:00
|
|
|
ctxt->catalogs = NULL;
|
2013-02-19 10:21:49 +08:00
|
|
|
ctxt->sizeentities = 0;
|
|
|
|
ctxt->sizeentcopy = 0;
|
2012-05-15 11:18:40 +08:00
|
|
|
ctxt->input_id = 1;
|
2023-08-20 20:48:10 +02:00
|
|
|
ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlInitNodeInfoSeq(&ctxt->node_seq);
|
2023-10-02 12:16:05 +02:00
|
|
|
|
|
|
|
if (ctxt->nsdb == NULL) {
|
|
|
|
ctxt->nsdb = xmlParserNsCreate();
|
2024-06-11 16:19:58 +02:00
|
|
|
if (ctxt->nsdb == NULL)
|
2023-10-02 12:16:05 +02:00
|
|
|
return(-1);
|
|
|
|
}
|
|
|
|
|
2003-04-24 16:06:47 +00:00
|
|
|
return(0);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2022-08-24 04:21:58 +02:00
|
|
|
/**
|
|
|
|
* xmlInitParserCtxt:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
|
|
|
* DEPRECATED: Internal function which will be made private in a future
|
|
|
|
* version.
|
|
|
|
*
|
|
|
|
* Initialize a parser context
|
|
|
|
*
|
|
|
|
* Returns 0 in case of success and -1 in case of error
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
|
|
|
return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlFreeParserCtxt:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
|
|
|
* Free all the memory used by a parser context. However the parsed
|
|
|
|
* document in ctxt->myDoc is not freed.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
|
|
|
xmlParserInputPtr input;
|
|
|
|
|
|
|
|
if (ctxt == NULL) return;
|
|
|
|
|
2024-11-17 20:13:14 +01:00
|
|
|
while ((input = xmlCtxtPopInput(ctxt)) != NULL) { /* Non consuming */
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlFreeInputStream(input);
|
|
|
|
}
|
|
|
|
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
|
2003-08-28 10:34:33 +00:00
|
|
|
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
|
2001-02-23 17:55:21 +00:00
|
|
|
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
|
2010-03-15 15:16:02 +01:00
|
|
|
if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
|
2001-02-23 17:55:21 +00:00
|
|
|
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
|
|
|
|
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
|
|
|
|
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
|
|
|
|
if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
|
|
|
|
if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
|
2003-09-30 00:43:48 +00:00
|
|
|
#ifdef LIBXML_SAX1_ENABLED
|
2003-09-25 14:29:29 +00:00
|
|
|
if ((ctxt->sax != NULL) &&
|
|
|
|
(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
|
2003-09-30 00:43:48 +00:00
|
|
|
#else
|
|
|
|
if (ctxt->sax != NULL)
|
|
|
|
#endif /* LIBXML_SAX1_ENABLED */
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlFree(ctxt->sax);
|
2024-07-02 21:54:26 +02:00
|
|
|
if (ctxt->directory != NULL) xmlFree(ctxt->directory);
|
2001-06-19 11:07:54 +00:00
|
|
|
if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
|
2003-08-28 10:34:33 +00:00
|
|
|
if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
|
2003-08-18 12:15:38 +00:00
|
|
|
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
|
2023-09-29 00:18:44 +02:00
|
|
|
if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
|
|
|
|
if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
|
|
|
|
if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
|
2003-09-10 10:50:59 +00:00
|
|
|
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
|
|
|
|
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
|
2012-09-11 13:26:36 +08:00
|
|
|
if (ctxt->attsDefault != NULL)
|
2017-11-09 16:42:47 +01:00
|
|
|
xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
|
2003-09-10 10:50:59 +00:00
|
|
|
if (ctxt->attsSpecial != NULL)
|
|
|
|
xmlHashFree(ctxt->attsSpecial, NULL);
|
2003-09-17 10:26:25 +00:00
|
|
|
if (ctxt->freeElems != NULL) {
|
|
|
|
xmlNodePtr cur, next;
|
|
|
|
|
|
|
|
cur = ctxt->freeElems;
|
|
|
|
while (cur != NULL) {
|
|
|
|
next = cur->next;
|
|
|
|
xmlFree(cur);
|
|
|
|
cur = next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ctxt->freeAttrs != NULL) {
|
|
|
|
xmlAttrPtr cur, next;
|
|
|
|
|
|
|
|
cur = ctxt->freeAttrs;
|
|
|
|
while (cur != NULL) {
|
|
|
|
next = cur->next;
|
|
|
|
xmlFree(cur);
|
|
|
|
cur = next;
|
|
|
|
}
|
|
|
|
}
|
2003-10-02 22:28:19 +00:00
|
|
|
/*
|
|
|
|
* cleanup the error strings
|
|
|
|
*/
|
|
|
|
if (ctxt->lastError.message != NULL)
|
|
|
|
xmlFree(ctxt->lastError.message);
|
|
|
|
if (ctxt->lastError.file != NULL)
|
|
|
|
xmlFree(ctxt->lastError.file);
|
|
|
|
if (ctxt->lastError.str1 != NULL)
|
|
|
|
xmlFree(ctxt->lastError.str1);
|
|
|
|
if (ctxt->lastError.str2 != NULL)
|
|
|
|
xmlFree(ctxt->lastError.str2);
|
|
|
|
if (ctxt->lastError.str3 != NULL)
|
|
|
|
xmlFree(ctxt->lastError.str3);
|
2003-09-07 09:14:37 +00:00
|
|
|
|
2001-08-22 14:29:45 +00:00
|
|
|
#ifdef LIBXML_CATALOG_ENABLED
|
|
|
|
if (ctxt->catalogs != NULL)
|
|
|
|
xmlCatalogFreeLocal(ctxt->catalogs);
|
|
|
|
#endif
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlFree(ctxt);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewParserCtxt:
|
|
|
|
*
|
|
|
|
* Allocate and initialize a new parser context.
|
|
|
|
*
|
|
|
|
* Returns the xmlParserCtxtPtr or NULL
|
|
|
|
*/
|
|
|
|
|
|
|
|
xmlParserCtxtPtr
|
2005-07-29 22:02:24 +00:00
|
|
|
xmlNewParserCtxt(void)
|
2022-08-24 04:21:58 +02:00
|
|
|
{
|
|
|
|
return(xmlNewSAXParserCtxt(NULL, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlNewSAXParserCtxt:
|
|
|
|
* @sax: SAX handler
|
|
|
|
* @userData: user data
|
|
|
|
*
|
2022-09-01 00:13:19 +02:00
|
|
|
* Allocate and initialize a new SAX parser context. If userData is NULL,
|
|
|
|
* the parser context will be passed as user data.
|
2022-08-24 04:21:58 +02:00
|
|
|
*
|
2023-12-27 18:33:30 +01:00
|
|
|
* Available since 2.11.0. If you want support older versions,
|
|
|
|
* it's best to invoke xmlNewParserCtxt and set ctxt->sax with
|
|
|
|
* struct assignment.
|
|
|
|
*
|
2022-09-01 00:13:19 +02:00
|
|
|
* Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
|
2022-08-24 04:21:58 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
xmlParserCtxtPtr
|
2022-09-01 00:13:19 +02:00
|
|
|
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
|
2001-02-23 17:55:21 +00:00
|
|
|
{
|
|
|
|
xmlParserCtxtPtr ctxt;
|
|
|
|
|
2023-12-27 18:33:30 +01:00
|
|
|
xmlInitParser();
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
|
2023-12-10 17:50:22 +01:00
|
|
|
if (ctxt == NULL)
|
2001-02-23 17:55:21 +00:00
|
|
|
return(NULL);
|
|
|
|
memset(ctxt, 0, sizeof(xmlParserCtxt));
|
2022-08-24 04:21:58 +02:00
|
|
|
if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
|
2003-04-24 16:06:47 +00:00
|
|
|
xmlFreeParserCtxt(ctxt);
|
|
|
|
return(NULL);
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
return(ctxt);
|
|
|
|
}
|
|
|
|
|
2024-06-26 04:32:49 +02:00
|
|
|
/**
|
|
|
|
* xmlCtxtGetPrivate:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Returns the private application data.
|
|
|
|
*/
|
|
|
|
void *
|
|
|
|
xmlCtxtGetPrivate(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(ctxt->_private);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtSetPrivate:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @priv: private application data
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Set the private application data.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtSetPrivate(xmlParserCtxtPtr ctxt, void *priv) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ctxt->_private = priv;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtGetCatalogs:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Returns the local catalogs.
|
|
|
|
*/
|
|
|
|
void *
|
|
|
|
xmlCtxtGetCatalogs(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(ctxt->catalogs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtSetCatalogs:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @catalogs: catalogs pointer
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Set the local catalogs.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtSetCatalogs(xmlParserCtxtPtr ctxt, void *catalogs) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ctxt->catalogs = catalogs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtGetDict:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Returns the dictionary.
|
|
|
|
*/
|
|
|
|
xmlDictPtr
|
|
|
|
xmlCtxtGetDict(xmlParserCtxtPtr ctxt) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return(NULL);
|
|
|
|
|
|
|
|
return(ctxt->dict);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlCtxtSetDict:
|
2024-12-26 21:05:18 +01:00
|
|
|
* @ctxt: parser context
|
|
|
|
* @dict: dictionary
|
2024-06-26 04:32:49 +02:00
|
|
|
*
|
2024-06-27 16:23:14 +02:00
|
|
|
* Available since 2.14.0.
|
|
|
|
*
|
2024-06-26 04:32:49 +02:00
|
|
|
* Set the dictionary. This should only be done immediately after
|
|
|
|
* creating a parser context.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlCtxtSetDict(xmlParserCtxtPtr ctxt, xmlDictPtr dict) {
|
|
|
|
if (ctxt == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (ctxt->dict != NULL)
|
|
|
|
xmlDictFree(ctxt->dict);
|
|
|
|
|
|
|
|
xmlDictReference(dict);
|
|
|
|
ctxt->dict = dict;
|
|
|
|
}
|
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/************************************************************************
|
|
|
|
* *
|
2020-03-08 17:19:42 +01:00
|
|
|
* Handling of node information *
|
2001-02-23 17:55:21 +00:00
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlClearParserCtxt:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
*
|
|
|
|
* Clear (release owned resources) and reinitialize a parser context
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
|
|
|
|
{
|
2001-08-31 14:55:30 +00:00
|
|
|
if (ctxt==NULL)
|
|
|
|
return;
|
2001-02-23 17:55:21 +00:00
|
|
|
xmlClearNodeInfoSeq(&ctxt->node_seq);
|
2004-11-02 14:52:23 +00:00
|
|
|
xmlCtxtReset(ctxt);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
2004-11-05 17:22:25 +00:00
|
|
|
|
2001-02-23 17:55:21 +00:00
|
|
|
/**
|
|
|
|
* xmlParserFindNodeInfo:
|
2002-12-10 15:19:08 +00:00
|
|
|
* @ctx: an XML parser context
|
2001-02-23 17:55:21 +00:00
|
|
|
* @node: an XML node within the tree
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Find the parser node info struct for a given node
|
2012-09-11 13:26:36 +08:00
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Returns an xmlParserNodeInfo block pointer or NULL
|
|
|
|
*/
|
2004-11-05 17:22:25 +00:00
|
|
|
const xmlParserNodeInfo *
|
2023-11-23 15:22:59 +01:00
|
|
|
xmlParserFindNodeInfo(xmlParserCtxtPtr ctx, xmlNodePtr node)
|
2001-02-23 17:55:21 +00:00
|
|
|
{
|
2004-11-05 17:22:25 +00:00
|
|
|
unsigned long pos;
|
|
|
|
|
|
|
|
if ((ctx == NULL) || (node == NULL))
|
|
|
|
return (NULL);
|
|
|
|
/* Find position where node should be at */
|
|
|
|
pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
|
|
|
|
if (pos < ctx->node_seq.length
|
|
|
|
&& ctx->node_seq.buffer[pos].node == node)
|
|
|
|
return &ctx->node_seq.buffer[pos];
|
|
|
|
else
|
|
|
|
return NULL;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlInitNodeInfoSeq:
|
|
|
|
* @seq: a node info sequence pointer
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* -- Initialize (set to initial state) node info sequence
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
|
|
|
|
{
|
2004-11-05 17:22:25 +00:00
|
|
|
if (seq == NULL)
|
|
|
|
return;
|
|
|
|
seq->length = 0;
|
|
|
|
seq->maximum = 0;
|
|
|
|
seq->buffer = NULL;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlClearNodeInfoSeq:
|
|
|
|
* @seq: a node info sequence pointer
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* -- Clear (release memory and reinitialize) node
|
|
|
|
* info sequence
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
|
|
|
|
{
|
2004-11-05 17:22:25 +00:00
|
|
|
if (seq == NULL)
|
|
|
|
return;
|
|
|
|
if (seq->buffer != NULL)
|
|
|
|
xmlFree(seq->buffer);
|
|
|
|
xmlInitNodeInfoSeq(seq);
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlParserFindNodeInfoIndex:
|
|
|
|
* @seq: a node info sequence pointer
|
|
|
|
* @node: an XML node pointer
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
2012-09-11 13:26:36 +08:00
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* xmlParserFindNodeInfoIndex : Find the index that the info record for
|
|
|
|
* the given node is or should be at in a sorted sequence
|
|
|
|
*
|
|
|
|
* Returns a long indicating the position of the record
|
|
|
|
*/
|
2004-11-05 17:22:25 +00:00
|
|
|
unsigned long
|
2023-11-23 15:22:59 +01:00
|
|
|
xmlParserFindNodeInfoIndex(xmlParserNodeInfoSeqPtr seq,
|
|
|
|
xmlNodePtr node)
|
2001-02-23 17:55:21 +00:00
|
|
|
{
|
2004-11-05 17:22:25 +00:00
|
|
|
unsigned long upper, lower, middle;
|
|
|
|
int found = 0;
|
|
|
|
|
|
|
|
if ((seq == NULL) || (node == NULL))
|
2005-12-10 11:11:12 +00:00
|
|
|
return ((unsigned long) -1);
|
2004-11-05 17:22:25 +00:00
|
|
|
|
|
|
|
/* Do a binary search for the key */
|
|
|
|
lower = 1;
|
|
|
|
upper = seq->length;
|
|
|
|
middle = 0;
|
|
|
|
while (lower <= upper && !found) {
|
|
|
|
middle = lower + (upper - lower) / 2;
|
|
|
|
if (node == seq->buffer[middle - 1].node)
|
|
|
|
found = 1;
|
|
|
|
else if (node < seq->buffer[middle - 1].node)
|
|
|
|
upper = middle - 1;
|
|
|
|
else
|
|
|
|
lower = middle + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return position */
|
|
|
|
if (middle == 0 || seq->buffer[middle - 1].node < node)
|
|
|
|
return middle;
|
2001-02-23 17:55:21 +00:00
|
|
|
else
|
2004-11-05 17:22:25 +00:00
|
|
|
return middle - 1;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlParserAddNodeInfo:
|
|
|
|
* @ctxt: an XML parser context
|
|
|
|
* @info: a node info sequence pointer
|
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Don't use.
|
|
|
|
*
|
2001-02-23 17:55:21 +00:00
|
|
|
* Insert node info record into the sorted sequence
|
|
|
|
*/
|
|
|
|
void
|
2002-01-23 17:53:44 +00:00
|
|
|
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
|
2023-11-23 15:22:59 +01:00
|
|
|
xmlParserNodeInfoPtr info)
|
2001-02-23 17:55:21 +00:00
|
|
|
{
|
2002-01-23 17:53:44 +00:00
|
|
|
unsigned long pos;
|
|
|
|
|
2004-11-05 17:22:25 +00:00
|
|
|
if ((ctxt == NULL) || (info == NULL)) return;
|
|
|
|
|
2002-01-23 17:53:44 +00:00
|
|
|
/* Find pos and check to see if node is already in the sequence */
|
2003-07-31 14:47:38 +00:00
|
|
|
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
|
2002-01-23 17:53:44 +00:00
|
|
|
info->node);
|
2006-03-09 14:13:55 +00:00
|
|
|
|
2012-09-11 13:26:36 +08:00
|
|
|
if ((pos < ctxt->node_seq.length) &&
|
2006-03-09 14:13:55 +00:00
|
|
|
(ctxt->node_seq.buffer != NULL) &&
|
|
|
|
(ctxt->node_seq.buffer[pos].node == info->node)) {
|
2002-01-23 17:53:44 +00:00
|
|
|
ctxt->node_seq.buffer[pos] = *info;
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2002-01-23 17:53:44 +00:00
|
|
|
/* Otherwise, we need to add new node to buffer */
|
|
|
|
else {
|
2024-12-15 23:36:04 +01:00
|
|
|
if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
|
|
|
|
xmlParserNodeInfo *tmp;
|
|
|
|
int newSize;
|
2002-01-23 17:53:44 +00:00
|
|
|
|
2024-12-15 23:36:04 +01:00
|
|
|
newSize = xmlGrowCapacity(ctxt->node_seq.maximum, sizeof(tmp[0]),
|
|
|
|
4, XML_MAX_ITEMS);
|
|
|
|
if (newSize < 0) {
|
|
|
|
xmlCtxtErrMemory(ctxt);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
tmp = xmlRealloc(ctxt->node_seq.buffer, newSize * sizeof(tmp[0]));
|
|
|
|
if (tmp == NULL) {
|
2023-12-20 00:33:34 +01:00
|
|
|
xmlCtxtErrMemory(ctxt);
|
2002-01-23 17:53:44 +00:00
|
|
|
return;
|
|
|
|
}
|
2024-12-15 23:36:04 +01:00
|
|
|
ctxt->node_seq.buffer = tmp;
|
|
|
|
ctxt->node_seq.maximum = newSize;
|
2002-01-23 17:53:44 +00:00
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2002-01-23 17:53:44 +00:00
|
|
|
/* If position is not at end, move elements out of the way */
|
|
|
|
if (pos != ctxt->node_seq.length) {
|
|
|
|
unsigned long i;
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2002-01-23 17:53:44 +00:00
|
|
|
for (i = ctxt->node_seq.length; i > pos; i--)
|
|
|
|
ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
|
|
|
|
}
|
2001-02-23 17:55:21 +00:00
|
|
|
|
2002-01-23 17:53:44 +00:00
|
|
|
/* Copy element and increase length */
|
|
|
|
ctxt->node_seq.buffer[pos] = *info;
|
|
|
|
ctxt->node_seq.length++;
|
2001-02-23 17:55:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-07-25 17:18:57 +00:00
|
|
|
/************************************************************************
|
|
|
|
* *
|
|
|
|
* Defaults settings *
|
|
|
|
* *
|
|
|
|
************************************************************************/
|
|
|
|
/**
|
|
|
|
* xmlPedanticParserDefault:
|
2012-09-11 13:26:36 +08:00
|
|
|
* @val: int 0 or 1
|
2001-07-25 17:18:57 +00:00
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
|
|
|
|
*
|
2001-07-25 17:18:57 +00:00
|
|
|
* Set and return the previous value for enabling pedantic warnings.
|
|
|
|
*
|
|
|
|
* Returns the last value for 0 for no substitution, 1 for substitution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlPedanticParserDefault(int val) {
|
|
|
|
int old = xmlPedanticParserDefaultValue;
|
|
|
|
|
|
|
|
xmlPedanticParserDefaultValue = val;
|
|
|
|
return(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlLineNumbersDefault:
|
2012-09-11 13:26:36 +08:00
|
|
|
* @val: int 0 or 1
|
2001-07-25 17:18:57 +00:00
|
|
|
*
|
2022-08-24 15:55:46 +02:00
|
|
|
* DEPRECATED: The modern options API always enables line numbers.
|
|
|
|
*
|
2001-07-25 17:18:57 +00:00
|
|
|
* Set and return the previous value for enabling line numbers in elements
|
|
|
|
* contents. This may break on old application and is turned off by default.
|
|
|
|
*
|
|
|
|
* Returns the last value for 0 for no substitution, 1 for substitution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlLineNumbersDefault(int val) {
|
|
|
|
int old = xmlLineNumbersDefaultValue;
|
|
|
|
|
|
|
|
xmlLineNumbersDefaultValue = val;
|
|
|
|
return(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlSubstituteEntitiesDefault:
|
2012-09-11 13:26:36 +08:00
|
|
|
* @val: int 0 or 1
|
2001-07-25 17:18:57 +00:00
|
|
|
*
|
2022-08-24 15:12:24 +02:00
|
|
|
* DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
|
|
|
|
*
|
2001-07-25 17:18:57 +00:00
|
|
|
* Set and return the previous value for default entity support.
|
|
|
|
* Initially the parser always keep entity references instead of substituting
|
|
|
|
* entity values in the output. This function has to be used to change the
|
2001-12-31 16:16:02 +00:00
|
|
|
* default parser behavior
|
|
|
|
* SAX::substituteEntities() has to be used for changing that on a file by
|
2001-07-25 17:18:57 +00:00
|
|
|
* file basis.
|
|
|
|
*
|
|
|
|
* Returns the last value for 0 for no substitution, 1 for substitution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlSubstituteEntitiesDefault(int val) {
|
|
|
|
int old = xmlSubstituteEntitiesDefaultValue;
|
|
|
|
|
|
|
|
xmlSubstituteEntitiesDefaultValue = val;
|
|
|
|
return(old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* xmlKeepBlanksDefault:
|
2012-09-11 13:26:36 +08:00
|
|
|
* @val: int 0 or 1
|
2001-07-25 17:18:57 +00:00
|
|
|
*
|
2022-08-24 15:55:46 +02:00
|
|
|
* DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
|
|
|
|
*
|
2001-07-25 17:18:57 +00:00
|
|
|
* Set and return the previous value for default blanks text nodes support.
|
|
|
|
* The 1.x version of the parser used an heuristic to try to detect
|
|
|
|
* ignorable white spaces. As a result the SAX callback was generating
|
2003-09-26 14:51:39 +00:00
|
|
|
* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
|
2001-07-25 17:18:57 +00:00
|
|
|
* using the DOM output text nodes containing those blanks were not generated.
|
|
|
|
* The 2.x and later version will switch to the XML standard way and
|
|
|
|
* ignorableWhitespace() are only generated when running the parser in
|
|
|
|
* validating mode and when the current element doesn't allow CDATA or
|
|
|
|
* mixed content.
|
2012-09-11 13:26:36 +08:00
|
|
|
* This function is provided as a way to force the standard behavior
|
2001-07-25 17:18:57 +00:00
|
|
|
* on 1.X libs and to switch back to the old mode for compatibility when
|
|
|
|
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
|
|
|
|
* by using xmlIsBlankNode() commodity function to detect the "empty"
|
|
|
|
* nodes generated.
|
|
|
|
* This value also affect autogeneration of indentation when saving code
|
|
|
|
* if blanks sections are kept, indentation is not generated.
|
|
|
|
*
|
|
|
|
* Returns the last value for 0 for no substitution, 1 for substitution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
xmlKeepBlanksDefault(int val) {
|
|
|
|
int old = xmlKeepBlanksDefaultValue;
|
|
|
|
|
|
|
|
xmlKeepBlanksDefaultValue = val;
|
2023-09-20 17:54:48 +02:00
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED
|
|
|
|
if (!val)
|
|
|
|
xmlIndentTreeOutput = 1;
|
|
|
|
#endif
|
2001-07-25 17:18:57 +00:00
|
|
|
return(old);
|
|
|
|
}
|
|
|
|
|