From 4365a5e1153dd5ed7d269d00f8f14daee0fac5c8 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 26 Feb 2024 15:14:28 +0100 Subject: [PATCH] xmlreader: Fix xmlTextReaderConstEncoding Regression from commit f1c1f5c6. Fixes #697. --- SAX2.c | 12 +----------- include/private/parser.h | 2 ++ parserInternals.c | 24 ++++++++++++++++++++++++ xmlreader.c | 24 +++++++++++------------- 4 files changed, 38 insertions(+), 24 deletions(-) diff --git a/SAX2.c b/SAX2.c index ed21a559..bb72e160 100644 --- a/SAX2.c +++ b/SAX2.c @@ -955,17 +955,7 @@ xmlSAX2EndDocument(void *ctx) doc = ctxt->myDoc; if ((doc != NULL) && (doc->encoding == NULL)) { - const xmlChar *encoding = NULL; - - if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) || - (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) { - /* Preserve encoding exactly */ - encoding = ctxt->encoding; - } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) { - encoding = BAD_CAST ctxt->input->buf->encoder->name; - } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { - encoding = BAD_CAST "UTF-8"; - } + const xmlChar *encoding = xmlGetActualEncoding(ctxt); if (encoding != NULL) { doc->encoding = xmlStrdup(encoding); diff --git a/include/private/parser.h b/include/private/parser.h index 40d179fe..7f8f6912 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -48,6 +48,8 @@ XML_HIDDEN void xmlDetectEncoding(xmlParserCtxtPtr ctxt); XML_HIDDEN void xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding); +XML_HIDDEN const xmlChar * +xmlGetActualEncoding(xmlParserCtxtPtr ctxt); XML_HIDDEN xmlParserNsData * xmlParserNsCreate(void); diff --git a/parserInternals.c b/parserInternals.c index e6b4cb14..166397bd 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1479,6 +1479,30 @@ xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) { } } +/** + * xmlGetActualEncoding: + * @ctxt: the parser context + * + * Returns the actual used to parse the document. This can differ from + * the declared encoding. + */ +const xmlChar * +xmlGetActualEncoding(xmlParserCtxtPtr ctxt) { + const xmlChar *encoding = NULL; + + if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) || + (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) { + /* Preserve encoding exactly */ + encoding = ctxt->encoding; + } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) { + encoding = BAD_CAST ctxt->input->buf->encoder->name; + } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) { + encoding = BAD_CAST "UTF-8"; + } + + return(encoding); +} + /************************************************************************ * * * Commodity functions to handle entities processing * diff --git a/xmlreader.c b/xmlreader.c index 1f903306..5fdeb2b8 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -40,6 +40,7 @@ #endif #include "private/buf.h" +#include "private/parser.h" #include "private/tree.h" #ifdef LIBXML_XINCLUDE_ENABLED #include "private/xinclude.h" @@ -2795,20 +2796,17 @@ xmlTextReaderReadAttributeValue(xmlTextReaderPtr reader) { */ const xmlChar * xmlTextReaderConstEncoding(xmlTextReaderPtr reader) { - xmlDocPtr doc = NULL; - if (reader == NULL) - return(NULL); - if (reader->doc != NULL) - doc = reader->doc; - else if (reader->ctxt != NULL) - doc = reader->ctxt->myDoc; - if (doc == NULL) - return(NULL); + const xmlChar *encoding = NULL; - if (doc->encoding == NULL) - return(NULL); - else - return(CONSTSTR(doc->encoding)); + if (reader == NULL) + return(NULL); + + if (reader->ctxt != NULL) + encoding = xmlGetActualEncoding(reader->ctxt); + else if (reader->doc != NULL) + encoding = reader->doc->encoding; + + return(CONSTSTR(encoding)); }