From 87c9e000e52e2aa2e15d44e1aede2b811b571996 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 9 Mar 2025 22:20:23 +0100 Subject: [PATCH] encoding: Rework custom encoding implementation API --- doc/libxml2-api.xml | 24 ++++-- encoding.c | 163 ++++++++++++++++++-------------------- example/icu.c | 14 ++-- include/libxml/encoding.h | 49 ++++++------ testapi.c | 13 ++- testparser.c | 20 ++--- 6 files changed, 146 insertions(+), 137 deletions(-) diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml index 980e755e..82593e21 100644 --- a/doc/libxml2-api.xml +++ b/doc/libxml2-api.xml @@ -354,7 +354,6 @@ - @@ -362,6 +361,7 @@ + @@ -8116,11 +8116,12 @@ crash if you try to modify the tree)'/> - vctxt: user data name: encoding name conv: pointer to xmlCharEncConverter struct If this function returns XML_ERR_OK, it must fill the @conv struct with a conversion function, and optional destructor and optional input and output conversion contexts. + If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler. - - - + + + + DEPERECATED: Don't use. @@ -8136,6 +8137,17 @@ crash if you try to modify the tree)'/> + + Create a custom xmlCharEncodingHandler. + + + + + + + + + Generic front-end for the encoding handler output function a first call with @in == NULL has to be made firs to initiate the output in case of non-stateless encoding needing to initiate their state or the output (like the BOM in UTF16). In case of UTF8 sequence conversion errors for the given encoder, the content will be automatically remapped to a CharRef sequence. @@ -12259,7 +12271,7 @@ crash if you try to modify the tree)'/> - Callback for custom resource loaders. @flags can contain XML_INPUT_UNZIP and XML_INPUT_NETWORK. + Callback for custom resource loaders. @flags can contain XML_INPUT_UNZIP and XML_INPUT_NETWORK. On success, @out should be set to a new parser input object and XML_ERR_OK should be returned. diff --git a/encoding.c b/encoding.c index 86bb7a28..3ac9e427 100644 --- a/encoding.c +++ b/encoding.c @@ -239,12 +239,12 @@ static int nbCharEncodingHandler = 0; #ifdef LIBXML_ICONV_ENABLED static int -xmlCharEncIconv(void *vctxt, const char *name, xmlCharEncConverter *conv); +xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out); #endif #ifdef LIBXML_ICU_ENABLED static int -xmlCharEncUconv(void *vctxt, const char *name, xmlCharEncConverter *conv); +xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out); #endif /************************************************************************ @@ -641,6 +641,54 @@ xmlNewCharEncodingHandler(const char *name, return(handler); } +/** + * xmlCharEncNewCustomHandler: + * @name: the encoding name + * @input: input callback which converts to UTF-8 + * @output: output callback which converts from UTF-8 + * @ctxtDtor: context destructor + * @inputCtxt: context for input callback + * @outputCtxt: context for output callback + * @out: pointer to resulting handler + * + * Create a custom xmlCharEncodingHandler. + * + * Returns an xmlParserError code. + */ +int +xmlCharEncNewCustomHandler(const char *name, + xmlCharEncConvFunc input, xmlCharEncConvFunc output, + xmlCharEncConvCtxtDtor ctxtDtor, + void *inputCtxt, void *outputCtxt, + xmlCharEncodingHandler **out) { + xmlCharEncodingHandler *handler; + + if (out == NULL) + return(XML_ERR_ARGUMENT); + + handler = xmlMalloc(sizeof(*handler)); + if (handler == NULL) + return(XML_ERR_NO_MEMORY); + memset(handler, 0, sizeof(*handler)); + + if (name != NULL) { + handler->name = xmlMemStrdup(name); + if (handler->name == NULL) { + xmlFree(handler); + return(XML_ERR_NO_MEMORY); + } + } + + handler->input.func = input; + handler->output.func = output; + handler->ctxtDtor = ctxtDtor; + handler->inputCtxt = inputCtxt; + handler->outputCtxt = outputCtxt; + + *out = handler; + return(XML_ERR_OK); +} + /** * xmlInitCharEncodingHandlers: * @@ -732,25 +780,6 @@ free_handler: } } -static int -xmlInvokeConvImpl(xmlCharEncConvImpl impl, void *implCtxt, - const char *name, xmlCharEncodingHandler *handler) { - xmlCharEncConverter conv = { NULL, NULL, NULL, NULL, NULL }; - int ret; - - ret = impl(implCtxt, name, &conv); - - if (ret == XML_ERR_OK) { - handler->input.func = conv.input; - handler->output.func = conv.output; - handler->ctxtDtor = conv.ctxtDtor; - handler->inputCtxt = conv.inputCtxt; - handler->outputCtxt = conv.outputCtxt; - } - - return(ret); -} - /** * xmlFindExtraHandler: * @norig: name of the char encoding @@ -768,40 +797,21 @@ static int xmlFindExtraHandler(const char *norig, const char *name, int output, xmlCharEncConvImpl impl, void *implCtxt, xmlCharEncodingHandler **out) { - xmlCharEncodingHandler *handler; - int ret; - int i; - - handler = xmlMalloc(sizeof(*handler)); - if (handler == NULL) - return(XML_ERR_NO_MEMORY); - memset(handler, 0, sizeof(*handler)); - - handler->name = xmlMemStrdup(name); - if (handler->name == NULL) { - ret = XML_ERR_NO_MEMORY; - goto done; - } - /* * Try custom implementation before deprecated global handlers. * * Note that we pass the original name without deprecated * alias resolution. */ - if (impl != NULL) { - ret = xmlInvokeConvImpl(impl, implCtxt, norig, handler); - if (ret != XML_ERR_OK) - goto done; - - *out = handler; - return(XML_ERR_OK); - } + if (impl != NULL) + return(impl(implCtxt, norig, output, out)); /* * Deprecated */ if (globalHandlers != NULL) { + int i; + for (i = 0; i < nbCharEncodingHandler; i++) { xmlCharEncodingHandler *h = globalHandlers[i]; @@ -809,42 +819,35 @@ xmlFindExtraHandler(const char *norig, const char *name, int output, (const xmlChar *) h->name)) { if ((output ? h->output.func : h->input.func) != NULL) { *out = h; - ret = XML_ERR_OK; - goto done; + return(XML_ERR_OK); } } } } #ifdef LIBXML_ICONV_ENABLED - ret = xmlInvokeConvImpl(xmlCharEncIconv, handler, name, handler); - if (ret == XML_ERR_OK) { - *out = handler; - return(XML_ERR_OK); + { + int ret = xmlCharEncIconv(name, out); + + if (ret == XML_ERR_OK) + return(XML_ERR_OK); + if (ret != XML_ERR_UNSUPPORTED_ENCODING) + return(ret); } - if (ret != XML_ERR_UNSUPPORTED_ENCODING) - goto done; #endif /* LIBXML_ICONV_ENABLED */ #ifdef LIBXML_ICU_ENABLED - ret = xmlInvokeConvImpl(xmlCharEncUconv, handler, name, handler); - if (ret == XML_ERR_OK) { - *out = handler; - return(XML_ERR_OK); + { + int ret = xmlCharEncUconv(name, out); + + if (ret == XML_ERR_OK) + return(XML_ERR_OK); + if (ret != XML_ERR_UNSUPPORTED_ENCODING) + return(ret); } - if (ret != XML_ERR_UNSUPPORTED_ENCODING) - goto done; #endif /* LIBXML_ICU_ENABLED */ - ret = XML_ERR_UNSUPPORTED_ENCODING; - -done: - if (handler != NULL) { - xmlFree(handler->name); - xmlFree(handler); - } - - return(ret); + return(XML_ERR_UNSUPPORTED_ENCODING); } /** @@ -1149,8 +1152,7 @@ xmlEncodingMatch(const char *name1, const char *name2) { #endif /* FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */ static int -xmlCharEncIconv(void *vctxt ATTRIBUTE_UNUSED, const char *name, - xmlCharEncConverter *conv) { +xmlCharEncIconv(const char *name, xmlCharEncodingHandler **out) { xmlIconvCtxt *inputCtxt = NULL, *outputCtxt = NULL; iconv_t icv_in; iconv_t icv_out; @@ -1241,13 +1243,9 @@ xmlCharEncIconv(void *vctxt ATTRIBUTE_UNUSED, const char *name, } outputCtxt->cd = icv_out; - conv->input = xmlIconvConvert; - conv->output = xmlIconvConvert; - conv->ctxtDtor = xmlIconvFree; - conv->inputCtxt = inputCtxt; - conv->outputCtxt = outputCtxt; - - return(XML_ERR_OK); + return(xmlCharEncNewCustomHandler(name, xmlIconvConvert, xmlIconvConvert, + xmlIconvFree, inputCtxt, outputCtxt, + out)); error: if (inputCtxt != NULL) @@ -1436,8 +1434,7 @@ xmlUconvFree(void *vctxt) { } static int -xmlCharEncUconv(void *vctxt ATTRIBUTE_UNUSED, const char *name, - xmlCharEncConverter *conv) { +xmlCharEncUconv(const char *name, xmlCharEncodingHandler **out) { xmlUconvCtxt *ucv_in = NULL; xmlUconvCtxt *ucv_out = NULL; int ret; @@ -1449,13 +1446,9 @@ xmlCharEncUconv(void *vctxt ATTRIBUTE_UNUSED, const char *name, if (ret != 0) goto error; - conv->input = xmlUconvConvert; - conv->output = xmlUconvConvert; - conv->ctxtDtor = xmlUconvFree; - conv->inputCtxt = ucv_in; - conv->outputCtxt = ucv_out; - - return(XML_ERR_OK); + return(xmlCharEncNewCustomHandler(name, xmlUconvConvert, xmlUconvConvert, + xmlUconvFree, ucv_in, ucv_out, + out)); error: if (ucv_in != NULL) diff --git a/example/icu.c b/example/icu.c index 80c9637b..0e93b671 100644 --- a/example/icu.c +++ b/example/icu.c @@ -174,8 +174,8 @@ icuConvCtxtDtor(void *vctxt) { } static int -icuConvImpl(void *vctxt, const char *name, - xmlCharEncConverter *conv) { +icuConvImpl(void *vctxt, const char *name, int output, + xmlCharEncodingHandler **out) { myConvCtxt *inputCtxt = NULL; myConvCtxt *outputCtxt = NULL; int ret; @@ -187,13 +187,9 @@ icuConvImpl(void *vctxt, const char *name, if (ret != 0) goto error; - conv->input = icuConvert; - conv->output = icuConvert; - conv->ctxtDtor = icuConvCtxtDtor; - conv->inputCtxt = inputCtxt; - conv->outputCtxt = outputCtxt; - - return XML_ERR_OK; + return xmlCharEncNewCustomHandler(name, icuConvert, icuConvert, + icuConvCtxtDtor, inputCtxt, outputCtxt, + out); error: if (inputCtxt != NULL) diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h index edf0b00a..8a3cddd8 100644 --- a/include/libxml/encoding.h +++ b/include/libxml/encoding.h @@ -147,30 +147,6 @@ typedef int typedef void (*xmlCharEncConvCtxtDtor)(void *vctxt); -typedef struct { - xmlCharEncConvFunc input; - xmlCharEncConvFunc output; - xmlCharEncConvCtxtDtor ctxtDtor; - void *inputCtxt; - void *outputCtxt; -} xmlCharEncConverter; - -/** - * xmlCharEncConvImpl: - * vctxt: user data - * name: encoding name - * conv: pointer to xmlCharEncConverter struct - * - * If this function returns XML_ERR_OK, it must fill the @conv struct - * with a conversion function, and optional destructor and optional - * input and output conversion contexts. - * - * Returns an xmlParserErrors code. - */ -typedef int -(*xmlCharEncConvImpl)(void *vctxt, const char *name, - xmlCharEncConverter *conv); - /* * Block defining the handlers for non UTF-8 encodings. * @@ -194,6 +170,23 @@ struct _xmlCharEncodingHandler { int flags XML_DEPRECATED_MEMBER; }; +/** + * xmlCharEncConvImpl: + * @vctxt: user data + * @name: encoding name + * @output: true if output encoding, false if input + * @out: pointer to resulting handler + * + * If this function returns XML_ERR_OK, it must fill the @out + * pointer with an encoding handler. The handler can be obtained + * from xmlCharEncNewCustomHandler. + * + * Returns an xmlParserErrors code. + */ +typedef int +(*xmlCharEncConvImpl)(void *vctxt, const char *name, int output, + xmlCharEncodingHandler **out); + /* * Interfaces for encoding handlers. */ @@ -226,6 +219,14 @@ XMLPUBFUN xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output); +XMLPUBFUN int + xmlCharEncNewCustomHandler (const char *name, + xmlCharEncConvFunc input, + xmlCharEncConvFunc output, + xmlCharEncConvCtxtDtor ctxtDtor, + void *inputCtxt, + void *outputCtxt, + xmlCharEncodingHandler **out); /* * Interfaces for encoding names and aliases. diff --git a/testapi.c b/testapi.c index 72d82a77..5270f5b9 100644 --- a/testapi.c +++ b/testapi.c @@ -8073,6 +8073,16 @@ test_xmlCharEncInFunc(void) { } +static int +test_xmlCharEncNewCustomHandler(void) { + int test_ret = 0; + + + /* missing type support */ + return(test_ret); +} + + static int test_xmlCharEncOutFunc(void) { int test_ret = 0; @@ -8544,11 +8554,12 @@ static int test_encoding(void) { int test_ret = 0; - if (quiet == 0) printf("Testing encoding : 16 of 22 functions ...\n"); + if (quiet == 0) printf("Testing encoding : 16 of 23 functions ...\n"); test_ret += test_xmlAddEncodingAlias(); test_ret += test_xmlCharEncCloseFunc(); test_ret += test_xmlCharEncFirstLine(); test_ret += test_xmlCharEncInFunc(); + test_ret += test_xmlCharEncNewCustomHandler(); test_ret += test_xmlCharEncOutFunc(); test_ret += test_xmlCleanupCharEncodingHandlers(); test_ret += test_xmlCleanupEncodingAliases(); diff --git a/testparser.c b/testparser.c index 4004ac8b..2f2456e0 100644 --- a/testparser.c +++ b/testparser.c @@ -987,26 +987,22 @@ rot13ConvCtxtDtor(void *vctxt) { } static int -rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name, - xmlCharEncConverter *conv) { +rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name, int output, + xmlCharEncodingHandler **out) { int *inputCtxt; - if (strcmp(name, "rot13") != 0) { - fprintf(stderr, "rot13ConvImpl received wrong name\n"); - charEncConvImplError = 1; + if (strcmp(name, "rot13") != 0) + return xmlCreateCharEncodingHandler(name, output, NULL, NULL, out); + if (output) return XML_ERR_UNSUPPORTED_ENCODING; - } - conv->input = rot13Convert; - conv->output = rot13Convert; - conv->ctxtDtor = rot13ConvCtxtDtor; - inputCtxt = xmlMalloc(sizeof(*inputCtxt)); *inputCtxt = 13; - conv->inputCtxt = inputCtxt; - return XML_ERR_OK; + return xmlCharEncNewCustomHandler(name, rot13Convert, rot13Convert, + rot13ConvCtxtDtor, inputCtxt, NULL, + out); } static int