encoding: Rework custom encoding implementation API

This commit is contained in:
Nick Wellnhofer 2025-03-09 22:20:23 +01:00
parent ba9148d8a5
commit 87c9e000e5
6 changed files with 146 additions and 137 deletions

View File

@ -354,7 +354,6 @@
<exports symbol='xmlCharEncodingHandler' type='typedef'/>
<exports symbol='xmlCharEncodingHandlerPtr' type='typedef'/>
<exports symbol='_xmlCharEncodingHandler' type='struct'/>
<exports symbol='xmlCharEncConverter' type='struct'/>
<exports symbol='xmlAddEncodingAlias' type='function'/>
<exports symbol='xmlCharEncCloseFunc' type='function'/>
<exports symbol='xmlCharEncConvCtxtDtor' type='function'/>
@ -362,6 +361,7 @@
<exports symbol='xmlCharEncConvImpl' type='function'/>
<exports symbol='xmlCharEncFirstLine' type='function'/>
<exports symbol='xmlCharEncInFunc' type='function'/>
<exports symbol='xmlCharEncNewCustomHandler' type='function'/>
<exports symbol='xmlCharEncOutFunc' type='function'/>
<exports symbol='xmlCharEncodingInputFunc' type='function'/>
<exports symbol='xmlCharEncodingOutputFunc' type='function'/>
@ -8116,11 +8116,12 @@ crash if you try to modify the tree)'/>
<arg name='vctxt' type='void *' info='conversion context'/>
</functype>
<functype name='xmlCharEncConvImpl' file='encoding' module='encoding'>
<info>vctxt: user data name: encoding name conv: pointer to xmlCharEncConverter struct If this function returns XML_ERR_OK, it must fill the @conv struct with a conversion function, and optional destructor and optional input and output conversion contexts.</info>
<info>If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler.</info>
<return type='int' info='an xmlParserErrors code.'/>
<arg name='vctxt' type='void *' info=''/>
<arg name='name' type='const char *' info=''/>
<arg name='conv' type='xmlCharEncConverter *' info=''/>
<arg name='vctxt' type='void *' info='user data'/>
<arg name='name' type='const char *' info='encoding name'/>
<arg name='output' type='int' info='true if output encoding, false if input'/>
<arg name='out' type='xmlCharEncodingHandler **' info='pointer to resulting handler'/>
</functype>
<function name='xmlCharEncFirstLine' file='encoding' module='encoding'>
<info>DEPERECATED: Don&apos;t use.</info>
@ -8136,6 +8137,17 @@ crash if you try to modify the tree)'/>
<arg name='out' type='xmlBufferPtr' info='an xmlBuffer for the output.'/>
<arg name='in' type='xmlBufferPtr' info='an xmlBuffer for the input'/>
</function>
<function name='xmlCharEncNewCustomHandler' file='encoding' module='encoding'>
<info>Create a custom xmlCharEncodingHandler.</info>
<return type='int' info='an xmlParserError code.'/>
<arg name='name' type='const char *' info='the encoding name'/>
<arg name='input' type='xmlCharEncConvFunc' info='input callback which converts to UTF-8'/>
<arg name='output' type='xmlCharEncConvFunc' info='output callback which converts from UTF-8'/>
<arg name='ctxtDtor' type='xmlCharEncConvCtxtDtor' info='context destructor'/>
<arg name='inputCtxt' type='void *' info='context for input callback'/>
<arg name='outputCtxt' type='void *' info='context for output callback'/>
<arg name='out' type='xmlCharEncodingHandler **' info='pointer to resulting handler'/>
</function>
<function name='xmlCharEncOutFunc' file='encoding' module='encoding'>
<info>Generic front-end for the encoding handler output function a first call with @in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence.</info>
<return type='int' info='the number of bytes written or an XML_ENC_ERR code.'/>
@ -12259,7 +12271,7 @@ crash if you try to modify the tree)'/>
<return type='void'/>
</function>
<functype name='xmlResourceLoader' file='parser' module='parser'>
<info>Callback for custom resource loaders. @flags can contain XML_INPUT_UNZIP and XML_INPUT_NETWORK.</info>
<info>Callback for custom resource loaders. @flags can contain XML_INPUT_UNZIP and XML_INPUT_NETWORK. On success, @out should be set to a new parser input object and XML_ERR_OK should be returned.</info>
<return type='int' info='an xmlParserError code.'/>
<arg name='ctxt' type='void *' info='parser context'/>
<arg name='url' type='const char *' info='URL to load'/>

View File

@ -239,12 +239,12 @@ static int nbCharEncodingHandler = 0;
#ifdef LIBXML_ICONV_ENABLED
static int
xmlCharEncIconv(void *vctxt, const char *name, xmlCharEncConverter *conv);
xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out);
#endif
#ifdef LIBXML_ICU_ENABLED
static int
xmlCharEncUconv(void *vctxt, const char *name, xmlCharEncConverter *conv);
xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out);
#endif
/************************************************************************
@ -641,6 +641,54 @@ xmlNewCharEncodingHandler(const char *name,
return(handler);
}
/**
* xmlCharEncNewCustomHandler:
* @name: the encoding name
* @input: input callback which converts to UTF-8
* @output: output callback which converts from UTF-8
* @ctxtDtor: context destructor
* @inputCtxt: context for input callback
* @outputCtxt: context for output callback
* @out: pointer to resulting handler
*
* Create a custom xmlCharEncodingHandler.
*
* Returns an xmlParserError code.
*/
int
xmlCharEncNewCustomHandler(const char *name,
xmlCharEncConvFunc input, xmlCharEncConvFunc output,
xmlCharEncConvCtxtDtor ctxtDtor,
void *inputCtxt, void *outputCtxt,
xmlCharEncodingHandler **out) {
xmlCharEncodingHandler *handler;
if (out == NULL)
return(XML_ERR_ARGUMENT);
handler = xmlMalloc(sizeof(*handler));
if (handler == NULL)
return(XML_ERR_NO_MEMORY);
memset(handler, 0, sizeof(*handler));
if (name != NULL) {
handler->name = xmlMemStrdup(name);
if (handler->name == NULL) {
xmlFree(handler);
return(XML_ERR_NO_MEMORY);
}
}
handler->input.func = input;
handler->output.func = output;
handler->ctxtDtor = ctxtDtor;
handler->inputCtxt = inputCtxt;
handler->outputCtxt = outputCtxt;
*out = handler;
return(XML_ERR_OK);
}
/**
* xmlInitCharEncodingHandlers:
*
@ -732,25 +780,6 @@ free_handler:
}
}
static int
xmlInvokeConvImpl(xmlCharEncConvImpl impl, void *implCtxt,
const char *name, xmlCharEncodingHandler *handler) {
xmlCharEncConverter conv = { NULL, NULL, NULL, NULL, NULL };
int ret;
ret = impl(implCtxt, name, &conv);
if (ret == XML_ERR_OK) {
handler->input.func = conv.input;
handler->output.func = conv.output;
handler->ctxtDtor = conv.ctxtDtor;
handler->inputCtxt = conv.inputCtxt;
handler->outputCtxt = conv.outputCtxt;
}
return(ret);
}
/**
* xmlFindExtraHandler:
* @norig: name of the char encoding
@ -768,40 +797,21 @@ static int
xmlFindExtraHandler(const char *norig, const char *name, int output,
xmlCharEncConvImpl impl, void *implCtxt,
xmlCharEncodingHandler **out) {
xmlCharEncodingHandler *handler;
int ret;
int i;
handler = xmlMalloc(sizeof(*handler));
if (handler == NULL)
return(XML_ERR_NO_MEMORY);
memset(handler, 0, sizeof(*handler));
handler->name = xmlMemStrdup(name);
if (handler->name == NULL) {
ret = XML_ERR_NO_MEMORY;
goto done;
}
/*
* Try custom implementation before deprecated global handlers.
*
* Note that we pass the original name without deprecated
* alias resolution.
*/
if (impl != NULL) {
ret = xmlInvokeConvImpl(impl, implCtxt, norig, handler);
if (ret != XML_ERR_OK)
goto done;
*out = handler;
return(XML_ERR_OK);
}
if (impl != NULL)
return(impl(implCtxt, norig, output, out));
/*
* Deprecated
*/
if (globalHandlers != NULL) {
int i;
for (i = 0; i < nbCharEncodingHandler; i++) {
xmlCharEncodingHandler *h = globalHandlers[i];
@ -809,42 +819,35 @@ xmlFindExtraHandler(const char *norig, const char *name, int output,
(const xmlChar *) h->name)) {
if ((output ? h->output.func : h->input.func) != NULL) {
*out = h;
ret = XML_ERR_OK;
goto done;
return(XML_ERR_OK);
}
}
}
}
#ifdef LIBXML_ICONV_ENABLED
ret = xmlInvokeConvImpl(xmlCharEncIconv, handler, name, handler);
if (ret == XML_ERR_OK) {
*out = handler;
{
int ret = xmlCharEncIconv(name, out);
if (ret == XML_ERR_OK)
return(XML_ERR_OK);
}
if (ret != XML_ERR_UNSUPPORTED_ENCODING)
goto done;
return(ret);
}
#endif /* LIBXML_ICONV_ENABLED */
#ifdef LIBXML_ICU_ENABLED
ret = xmlInvokeConvImpl(xmlCharEncUconv, handler, name, handler);
if (ret == XML_ERR_OK) {
*out = handler;
{
int ret = xmlCharEncUconv(name, out);
if (ret == XML_ERR_OK)
return(XML_ERR_OK);
}
if (ret != XML_ERR_UNSUPPORTED_ENCODING)
goto done;
return(ret);
}
#endif /* LIBXML_ICU_ENABLED */
ret = XML_ERR_UNSUPPORTED_ENCODING;
done:
if (handler != NULL) {
xmlFree(handler->name);
xmlFree(handler);
}
return(ret);
return(XML_ERR_UNSUPPORTED_ENCODING);
}
/**
@ -1149,8 +1152,7 @@ xmlEncodingMatch(const char *name1, const char *name2) {
#endif /* FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */
static int
xmlCharEncIconv(void *vctxt ATTRIBUTE_UNUSED, const char *name,
xmlCharEncConverter *conv) {
xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out) {
xmlIconvCtxt *inputCtxt = NULL, *outputCtxt = NULL;
iconv_t icv_in;
iconv_t icv_out;
@ -1241,13 +1243,9 @@ xmlCharEncIconv(void *vctxt ATTRIBUTE_UNUSED, const char *name,
}
outputCtxt->cd = icv_out;
conv->input = xmlIconvConvert;
conv->output = xmlIconvConvert;
conv->ctxtDtor = xmlIconvFree;
conv->inputCtxt = inputCtxt;
conv->outputCtxt = outputCtxt;
return(XML_ERR_OK);
return(xmlCharEncNewCustomHandler(name, xmlIconvConvert, xmlIconvConvert,
xmlIconvFree, inputCtxt, outputCtxt,
out));
error:
if (inputCtxt != NULL)
@ -1436,8 +1434,7 @@ xmlUconvFree(void *vctxt) {
}
static int
xmlCharEncUconv(void *vctxt ATTRIBUTE_UNUSED, const char *name,
xmlCharEncConverter *conv) {
xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out) {
xmlUconvCtxt *ucv_in = NULL;
xmlUconvCtxt *ucv_out = NULL;
int ret;
@ -1449,13 +1446,9 @@ xmlCharEncUconv(void *vctxt ATTRIBUTE_UNUSED, const char *name,
if (ret != 0)
goto error;
conv->input = xmlUconvConvert;
conv->output = xmlUconvConvert;
conv->ctxtDtor = xmlUconvFree;
conv->inputCtxt = ucv_in;
conv->outputCtxt = ucv_out;
return(XML_ERR_OK);
return(xmlCharEncNewCustomHandler(name, xmlUconvConvert, xmlUconvConvert,
xmlUconvFree, ucv_in, ucv_out,
out));
error:
if (ucv_in != NULL)

View File

@ -174,8 +174,8 @@ icuConvCtxtDtor(void *vctxt) {
}
static int
icuConvImpl(void *vctxt, const char *name,
xmlCharEncConverter *conv) {
icuConvImpl(void *vctxt, const char *name, int output,
xmlCharEncodingHandler **out) {
myConvCtxt *inputCtxt = NULL;
myConvCtxt *outputCtxt = NULL;
int ret;
@ -187,13 +187,9 @@ icuConvImpl(void *vctxt, const char *name,
if (ret != 0)
goto error;
conv->input = icuConvert;
conv->output = icuConvert;
conv->ctxtDtor = icuConvCtxtDtor;
conv->inputCtxt = inputCtxt;
conv->outputCtxt = outputCtxt;
return XML_ERR_OK;
return xmlCharEncNewCustomHandler(name, icuConvert, icuConvert,
icuConvCtxtDtor, inputCtxt, outputCtxt,
out);
error:
if (inputCtxt != NULL)

View File

@ -147,30 +147,6 @@ typedef int
typedef void
(*xmlCharEncConvCtxtDtor)(void *vctxt);
typedef struct {
xmlCharEncConvFunc input;
xmlCharEncConvFunc output;
xmlCharEncConvCtxtDtor ctxtDtor;
void *inputCtxt;
void *outputCtxt;
} xmlCharEncConverter;
/**
* xmlCharEncConvImpl:
* vctxt: user data
* name: encoding name
* conv: pointer to xmlCharEncConverter struct
*
* If this function returns XML_ERR_OK, it must fill the @conv struct
* with a conversion function, and optional destructor and optional
* input and output conversion contexts.
*
* Returns an xmlParserErrors code.
*/
typedef int
(*xmlCharEncConvImpl)(void *vctxt, const char *name,
xmlCharEncConverter *conv);
/*
* Block defining the handlers for non UTF-8 encodings.
*
@ -194,6 +170,23 @@ struct _xmlCharEncodingHandler {
int flags XML_DEPRECATED_MEMBER;
};
/**
* xmlCharEncConvImpl:
* @vctxt: user data
* @name: encoding name
* @output: true if output encoding, false if input
* @out: pointer to resulting handler
*
* If this function returns XML_ERR_OK, it must fill the @out
* pointer with an encoding handler. The handler can be obtained
* from xmlCharEncNewCustomHandler.
*
* Returns an xmlParserErrors code.
*/
typedef int
(*xmlCharEncConvImpl)(void *vctxt, const char *name, int output,
xmlCharEncodingHandler **out);
/*
* Interfaces for encoding handlers.
*/
@ -226,6 +219,14 @@ XMLPUBFUN xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler (const char *name,
xmlCharEncodingInputFunc input,
xmlCharEncodingOutputFunc output);
XMLPUBFUN int
xmlCharEncNewCustomHandler (const char *name,
xmlCharEncConvFunc input,
xmlCharEncConvFunc output,
xmlCharEncConvCtxtDtor ctxtDtor,
void *inputCtxt,
void *outputCtxt,
xmlCharEncodingHandler **out);
/*
* Interfaces for encoding names and aliases.

View File

@ -8073,6 +8073,16 @@ test_xmlCharEncInFunc(void) {
}
static int
test_xmlCharEncNewCustomHandler(void) {
int test_ret = 0;
/* missing type support */
return(test_ret);
}
static int
test_xmlCharEncOutFunc(void) {
int test_ret = 0;
@ -8544,11 +8554,12 @@ static int
test_encoding(void) {
int test_ret = 0;
if (quiet == 0) printf("Testing encoding : 16 of 22 functions ...\n");
if (quiet == 0) printf("Testing encoding : 16 of 23 functions ...\n");
test_ret += test_xmlAddEncodingAlias();
test_ret += test_xmlCharEncCloseFunc();
test_ret += test_xmlCharEncFirstLine();
test_ret += test_xmlCharEncInFunc();
test_ret += test_xmlCharEncNewCustomHandler();
test_ret += test_xmlCharEncOutFunc();
test_ret += test_xmlCleanupCharEncodingHandlers();
test_ret += test_xmlCleanupEncodingAliases();

View File

@ -987,26 +987,22 @@ rot13ConvCtxtDtor(void *vctxt) {
}
static int
rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name,
xmlCharEncConverter *conv) {
rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name, int output,
xmlCharEncodingHandler **out) {
int *inputCtxt;
if (strcmp(name, "rot13") != 0) {
fprintf(stderr, "rot13ConvImpl received wrong name\n");
charEncConvImplError = 1;
if (strcmp(name, "rot13") != 0)
return xmlCreateCharEncodingHandler(name, output, NULL, NULL, out);
if (output)
return XML_ERR_UNSUPPORTED_ENCODING;
}
conv->input = rot13Convert;
conv->output = rot13Convert;
conv->ctxtDtor = rot13ConvCtxtDtor;
inputCtxt = xmlMalloc(sizeof(*inputCtxt));
*inputCtxt = 13;
conv->inputCtxt = inputCtxt;
return XML_ERR_OK;
return xmlCharEncNewCustomHandler(name, rot13Convert, rot13Convert,
rot13ConvCtxtDtor, inputCtxt, NULL,
out);
}
static int