libxml2/testparser.c
Nick Wellnhofer 282ec1d548 encoding: Rework xmlCharEncodingHandler layout
Reuse some of the old members.

The "input" and "output" function pointers are actually of type
xmlCharEncConvFunc, accepting an additional argument. For default
handlers, this argument is unused, so this should work with most ABIs.
For iconv handlers, these function pointers used to be NULL but now
point to a function which requires the extra argument.

"iconv_in" and "iconv_out" are made void pointers. "uconv_in" and
"uconv_out" are renamed and made void pointers. This is unlikely to
cause issues.

We now expect that the built-in conversion functions correctly report
XML_ENC_ERR_SPACE. For UTF8ToHtml and the ISO-8859-X code, this will be
done in the following commits.
2024-07-01 18:05:40 +02:00

638 lines
15 KiB
C

/*
* testparser.c: Additional parser tests
*
* See Copyright for the status of this software.
*/
#include "libxml.h"
#include <libxml/parser.h>
#include <libxml/uri.h>
#include <libxml/xmlreader.h>
#include <libxml/xmlwriter.h>
#include <libxml/HTMLparser.h>
#include <string.h>
static int
testStandaloneWithEncoding(void) {
xmlDocPtr doc;
const char *str =
"<?xml version=\"1.0\" standalone=\"yes\"?>\n"
"<doc></doc>\n";
int err = 0;
xmlResetLastError();
doc = xmlReadDoc(BAD_CAST str, NULL, "UTF-8", 0);
if (doc == NULL) {
fprintf(stderr, "xmlReadDoc failed\n");
err = 1;
}
xmlFreeDoc(doc);
return err;
}
static int
testUnsupportedEncoding(void) {
xmlDocPtr doc;
const xmlError *error;
int err = 0;
xmlResetLastError();
doc = xmlReadDoc(BAD_CAST "<doc/>", NULL, "#unsupported",
XML_PARSE_NOWARNING);
if (doc == NULL) {
fprintf(stderr, "xmlReadDoc failed with unsupported encoding\n");
err = 1;
}
xmlFreeDoc(doc);
error = xmlGetLastError();
if (error == NULL ||
error->code != XML_ERR_UNSUPPORTED_ENCODING ||
error->level != XML_ERR_WARNING ||
strcmp(error->message, "Unsupported encoding: #unsupported\n") != 0)
{
fprintf(stderr, "xmlReadDoc failed to raise correct error\n");
err = 1;
}
return err;
}
static int
testNodeGetContent(void) {
xmlDocPtr doc;
xmlChar *content;
int err = 0;
doc = xmlReadDoc(BAD_CAST "<doc/>", NULL, NULL, 0);
xmlAddChild(doc->children, xmlNewReference(doc, BAD_CAST "lt"));
content = xmlNodeGetContent((xmlNodePtr) doc);
if (strcmp((char *) content, "<") != 0) {
fprintf(stderr, "xmlNodeGetContent failed\n");
err = 1;
}
xmlFree(content);
xmlFreeDoc(doc);
return err;
}
#ifdef LIBXML_SAX1_ENABLED
static int
testBalancedChunk(void) {
xmlNodePtr list;
xmlNodePtr elem;
int ret;
int err = 0;
ret = xmlParseBalancedChunkMemory(NULL, NULL, NULL, 0,
BAD_CAST "start <node xml:lang='en'>abc</node> end", &list);
if ((ret != XML_ERR_OK) ||
(list == NULL) ||
((elem = list->next) == NULL) ||
(elem->type != XML_ELEMENT_NODE) ||
(elem->nsDef == NULL) ||
(!xmlStrEqual(elem->nsDef->href, XML_XML_NAMESPACE))) {
fprintf(stderr, "xmlParseBalancedChunkMemory failed\n");
err = 1;
}
xmlFreeNodeList(list);
return(err);
}
#endif
#ifdef LIBXML_PUSH_ENABLED
static int
testHugePush(void) {
xmlParserCtxtPtr ctxt;
int err, i;
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
/*
* Push parse a document larger than XML_MAX_LOOKUP_LIMIT
* (10,000,000 bytes). This mainly tests whether shrinking the
* buffer works when push parsing.
*/
xmlParseChunk(ctxt, "<doc>", 5, 0);
for (i = 0; i < 1000000; i++)
xmlParseChunk(ctxt, "<elem>text</elem>", 17, 0);
xmlParseChunk(ctxt, "</doc>", 6, 1);
err = ctxt->wellFormed ? 0 : 1;
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
return err;
}
static int
testHugeEncodedChunk(void) {
xmlBufferPtr buf;
xmlChar *chunk;
xmlParserCtxtPtr ctxt;
int err, i;
/*
* Test the push parser with a built-in encoding handler like ISO-8859-1
* and a chunk larger than the initial decoded buffer (currently 4 KB).
*/
buf = xmlBufferCreate();
xmlBufferCat(buf,
BAD_CAST "<?xml version='1.0' encoding='ISO-8859-1'?>\n");
xmlBufferCat(buf, BAD_CAST "<doc><!-- ");
for (i = 0; i < 2000; i++)
xmlBufferCat(buf, BAD_CAST "0123456789");
xmlBufferCat(buf, BAD_CAST " --></doc>");
chunk = xmlBufferDetach(buf);
xmlBufferFree(buf);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
xmlParseChunk(ctxt, (char *) chunk, xmlStrlen(chunk), 0);
xmlParseChunk(ctxt, NULL, 0, 1);
err = ctxt->wellFormed ? 0 : 1;
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
xmlFree(chunk);
return err;
}
#ifdef LIBXML_HTML_ENABLED
static int
testHtmlPushWithEncoding(void) {
htmlParserCtxtPtr ctxt;
htmlDocPtr doc;
htmlNodePtr node;
int err = 0;
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
XML_CHAR_ENCODING_UTF8);
htmlParseChunk(ctxt, "-\xC3\xA4-", 4, 1);
doc = ctxt->myDoc;
if (!xmlStrEqual(doc->encoding, BAD_CAST "UTF-8")) {
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
err = 1;
}
node = xmlDocGetRootElement(doc)->children->children->children;
if (!xmlStrEqual(node->content, BAD_CAST "-\xC3\xA4-")) {
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
err = 1;
}
xmlFreeDoc(doc);
htmlFreeParserCtxt(ctxt);
return err;
}
#endif
#endif
#ifdef LIBXML_READER_ENABLED
static int
testReaderEncoding(void) {
xmlBuffer *buf;
xmlTextReader *reader;
xmlChar *xml;
const xmlChar *encoding;
int err = 0;
int i;
buf = xmlBufferCreate();
xmlBufferCCat(buf, "<?xml version='1.0' encoding='ISO-8859-1'?>\n");
xmlBufferCCat(buf, "<doc>");
for (i = 0; i < 8192; i++)
xmlBufferCCat(buf, "x");
xmlBufferCCat(buf, "</doc>");
xml = xmlBufferDetach(buf);
xmlBufferFree(buf);
reader = xmlReaderForDoc(BAD_CAST xml, NULL, NULL, 0);
xmlTextReaderRead(reader);
encoding = xmlTextReaderConstEncoding(reader);
if (!xmlStrEqual(encoding, BAD_CAST "ISO-8859-1")) {
fprintf(stderr, "testReaderEncoding failed\n");
err = 1;
}
xmlFreeTextReader(reader);
xmlFree(xml);
return err;
}
static int
testReaderContent(void) {
xmlTextReader *reader;
const xmlChar *xml = BAD_CAST "<d>x<e>y</e><f>z</f></d>";
xmlChar *string;
int err = 0;
reader = xmlReaderForDoc(xml, NULL, NULL, 0);
xmlTextReaderRead(reader);
string = xmlTextReaderReadOuterXml(reader);
if (!xmlStrEqual(string, xml)) {
fprintf(stderr, "xmlTextReaderReadOuterXml failed\n");
err = 1;
}
xmlFree(string);
string = xmlTextReaderReadInnerXml(reader);
if (!xmlStrEqual(string, BAD_CAST "x<e>y</e><f>z</f>")) {
fprintf(stderr, "xmlTextReaderReadInnerXml failed\n");
err = 1;
}
xmlFree(string);
string = xmlTextReaderReadString(reader);
if (!xmlStrEqual(string, BAD_CAST "xyz")) {
fprintf(stderr, "xmlTextReaderReadString failed\n");
err = 1;
}
xmlFree(string);
xmlFreeTextReader(reader);
return err;
}
#ifdef LIBXML_XINCLUDE_ENABLED
typedef struct {
char *message;
int code;
} testReaderErrorCtxt;
static void
testReaderError(void *arg, const char *msg,
xmlParserSeverities severity ATTRIBUTE_UNUSED,
xmlTextReaderLocatorPtr locator ATTRIBUTE_UNUSED) {
testReaderErrorCtxt *ctxt = arg;
if (ctxt->message != NULL)
xmlFree(ctxt->message);
ctxt->message = xmlMemStrdup(msg);
}
static void
testStructuredReaderError(void *arg, const xmlError *error) {
testReaderErrorCtxt *ctxt = arg;
if (ctxt->message != NULL)
xmlFree(ctxt->message);
ctxt->message = xmlMemStrdup(error->message);
ctxt->code = error->code;
}
static int
testReaderXIncludeError(void) {
/*
* Test whether XInclude errors are reported to the custom error
* handler of a reader.
*/
const char *doc =
"<doc xmlns:xi='http://www.w3.org/2001/XInclude'>\n"
" <xi:include/>\n"
"</doc>\n";
xmlTextReader *reader;
testReaderErrorCtxt errorCtxt;
int err = 0;
reader = xmlReaderForDoc(BAD_CAST doc, NULL, NULL, XML_PARSE_XINCLUDE);
xmlTextReaderSetErrorHandler(reader, testReaderError, &errorCtxt);
errorCtxt.message = NULL;
errorCtxt.code = 0;
while (xmlTextReaderRead(reader) > 0)
;
if (errorCtxt.message == NULL ||
strstr(errorCtxt.message, "href or xpointer") == NULL) {
fprintf(stderr, "xmlTextReaderSetErrorHandler failed\n");
err = 1;
}
xmlFree(errorCtxt.message);
xmlFreeTextReader(reader);
reader = xmlReaderForDoc(BAD_CAST doc, NULL, NULL, XML_PARSE_XINCLUDE);
xmlTextReaderSetStructuredErrorHandler(reader, testStructuredReaderError,
&errorCtxt);
errorCtxt.message = NULL;
errorCtxt.code = 0;
while (xmlTextReaderRead(reader) > 0)
;
if (errorCtxt.code != XML_XINCLUDE_NO_HREF ||
errorCtxt.message == NULL ||
strstr(errorCtxt.message, "href or xpointer") == NULL) {
fprintf(stderr, "xmlTextReaderSetStructuredErrorHandler failed\n");
err = 1;
}
xmlFree(errorCtxt.message);
xmlFreeTextReader(reader);
return err;
}
#endif
#endif
#ifdef LIBXML_WRITER_ENABLED
static int
testWriterIOWrite(void *ctxt, const char *data, int len) {
(void) ctxt;
(void) data;
return len;
}
static int
testWriterIOClose(void *ctxt) {
(void) ctxt;
return XML_IO_ENAMETOOLONG;
}
static int
testWriterClose(void){
xmlOutputBufferPtr out;
xmlTextWriterPtr writer;
int err = 0;
int result;
out = xmlOutputBufferCreateIO(testWriterIOWrite, testWriterIOClose,
NULL, NULL);
writer = xmlNewTextWriter(out);
xmlTextWriterStartDocument(writer, "1.0", "UTF-8", NULL);
xmlTextWriterStartElement(writer, BAD_CAST "elem");
xmlTextWriterEndElement(writer);
xmlTextWriterEndDocument(writer);
result = xmlTextWriterClose(writer);
if (result != XML_IO_ENAMETOOLONG) {
fprintf(stderr, "xmlTextWriterClose reported wrong error %d\n",
result);
err = 1;
}
xmlFreeTextWriter(writer);
return err;
}
#endif
typedef struct {
const char *uri;
const char *base;
const char *result;
} xmlRelativeUriTest;
static int
testBuildRelativeUri(void) {
xmlChar *res;
int err = 0;
int i;
static const xmlRelativeUriTest tests[] = {
{
"/a/b1/c1",
"/a/b2/c2",
"../b1/c1"
}, {
"a/b1/c1",
"a/b2/c2",
"../b1/c1"
}, {
"a/././b1/x/y/../z/../.././c1",
"./a/./b2/././b2",
"../b1/c1"
}, {
"file:///a/b1/c1",
"/a/b2/c2",
NULL
}, {
"/a/b1/c1",
"file:///a/b2/c2",
NULL
}, {
"a/b1/c1",
"/a/b2/c2",
NULL
}, {
"/a/b1/c1",
"a/b2/c2",
NULL
}, {
"http://example.org/a/b1/c1",
"http://example.org/a/b2/c2",
"../b1/c1"
}, {
"http://example.org/a/b1/c1",
"https://example.org/a/b2/c2",
NULL
}, {
"http://example.org/a/b1/c1",
"http://localhost/a/b2/c2",
NULL
}, {
"with space/x x/y y",
"with space/b2/c2",
"../x%20x/y%20y"
}, {
"with space/x x/y y",
"/b2/c2",
"with%20space/x%20x/y%20y"
}
#if defined(_WIN32) || defined(__CYGWIN__)
, {
"\\a\\b1\\c1",
"\\a\\b2\\c2",
"../b1/c1"
}, {
"\\a\\b1\\c1",
"/a/b2/c2",
"../b1/c1"
}, {
"a\\b1\\c1",
"a/b2/c2",
"../b1/c1"
}, {
"file://server/a/b1/c1",
"\\\\?\\UNC\\server\\a\\b2\\c2",
"../b1/c1"
}, {
"file://server/a/b1/c1",
"\\\\server\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"x:\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"\\\\?\\x:\\a\\b2\\c2",
"../b1/c1"
}, {
"file:///x:/a/b1/c1",
"file:///y:/a/b2/c2",
NULL
}, {
"x:/a/b1/c1",
"y:/a/b2/c2",
"file:///x:/a/b1/c1"
}, {
"/a/b1/c1",
"y:/a/b2/c2",
NULL
}, {
"\\\\server\\a\\b1\\c1",
"a/b2/c2",
"file://server/a/b1/c1"
}
#endif
};
for (i = 0; (size_t) i < sizeof(tests) / sizeof(tests[0]); i++) {
const xmlRelativeUriTest *test = tests + i;
const char *expect;
res = xmlBuildRelativeURI(BAD_CAST test->uri, BAD_CAST test->base);
expect = test->result ? test->result : test->uri;
if (!xmlStrEqual(res, BAD_CAST expect)) {
fprintf(stderr, "xmlBuildRelativeURI failed uri=%s base=%s "
"result=%s expected=%s\n", test->uri, test->base,
res, expect);
err = 1;
}
xmlFree(res);
}
return err;
}
static int charEncConvImplError;
static int
rot13Convert(unsigned char *out, int *outlen,
const unsigned char *in, int *inlen, void *vctxt) {
int *ctxt = vctxt;
int inSize = *inlen;
int outSize = *outlen;
int rot, i;
rot = *ctxt;
for (i = 0; i < inSize && i < outSize; i++) {
int c = in[i];
if (c >= 'A' && c <= 'Z')
c = 'A' + (c - 'A' + rot) % 26;
else if (c >= 'a' && c <= 'z')
c = 'a' + (c - 'a' + rot) % 26;
out[i] = c;
}
*inlen = i;
*outlen = i;
return XML_ENC_ERR_SUCCESS;
}
static void
rot13ConvCtxtDtor(void *vctxt) {
xmlFree(vctxt);
}
static int
rot13ConvImpl(void *vctxt ATTRIBUTE_UNUSED, const char *name,
xmlCharEncConverter *conv) {
int *inputCtxt;
if (strcmp(name, "rot13") != 0) {
fprintf(stderr, "rot13ConvImpl received wrong name\n");
charEncConvImplError = 1;
return XML_ERR_UNSUPPORTED_ENCODING;
}
conv->input = rot13Convert;
conv->output = rot13Convert;
conv->ctxtDtor = rot13ConvCtxtDtor;
inputCtxt = xmlMalloc(sizeof(*inputCtxt));
*inputCtxt = 13;
conv->inputCtxt = inputCtxt;
return XML_ERR_OK;
}
static int
testCharEncConvImpl(void) {
xmlParserCtxtPtr ctxt;
xmlDocPtr doc;
xmlNodePtr root;
int err = 0;
ctxt = xmlNewParserCtxt();
xmlCtxtSetCharEncConvImpl(ctxt, rot13ConvImpl, NULL);
charEncConvImplError = 0;
doc = xmlCtxtReadDoc(ctxt, BAD_CAST "<?kzy irefvba='1.0'?><qbp/>", NULL,
"rot13", 0);
if (charEncConvImplError)
err = 1;
xmlFreeParserCtxt(ctxt);
root = xmlDocGetRootElement(doc);
if (root == NULL || strcmp((char *) root->name, "doc") != 0) {
fprintf(stderr, "testCharEncConvImpl failed\n");
err = 1;
}
xmlFreeDoc(doc);
return err;
}
int
main(void) {
int err = 0;
err |= testStandaloneWithEncoding();
err |= testUnsupportedEncoding();
err |= testNodeGetContent();
#ifdef LIBXML_SAX1_ENABLED
err |= testBalancedChunk();
#endif
#ifdef LIBXML_PUSH_ENABLED
err |= testHugePush();
err |= testHugeEncodedChunk();
#ifdef LIBXML_HTML_ENABLED
err |= testHtmlPushWithEncoding();
#endif
#endif
#ifdef LIBXML_READER_ENABLED
err |= testReaderEncoding();
err |= testReaderContent();
#ifdef LIBXML_XINCLUDE_ENABLED
err |= testReaderXIncludeError();
#endif
#endif
#ifdef LIBXML_WRITER_ENABLED
err |= testWriterClose();
#endif
err |= testBuildRelativeUri();
err |= testCharEncConvImpl();
return err;
}