xmllint: Improve --memory and --testIO options

Support --memory and --testIO in SAX mode.

Keep memory-mapped file across repetitions.

Options `--sax --memory --noout --repeat` can now be used to benchmark
the core parser without building a DOM tree or repeatedly reading files
from disk.
This commit is contained in:
Nick Wellnhofer 2024-09-27 23:49:02 +02:00
parent 3ac214f01e
commit a4c16a140c

309
xmllint.c
View File

@ -142,6 +142,8 @@ static const int pushsize = 4096;
#endif /* LIBXML_PUSH_ENABLED */ #endif /* LIBXML_PUSH_ENABLED */
#if HAVE_DECL_MMAP #if HAVE_DECL_MMAP
static int memory = 0; static int memory = 0;
static char *memoryData;
static size_t memorySize;
#endif #endif
static int testIO = 0; static int testIO = 0;
#ifdef LIBXML_XINCLUDE_ENABLED #ifdef LIBXML_XINCLUDE_ENABLED
@ -270,6 +272,105 @@ xmllintResourceLoader(void *ctxt ATTRIBUTE_UNUSED, const char *URL,
return(XML_IO_ENOENT); return(XML_IO_ENOENT);
} }
/************************************************************************
* *
* Core parsing functions *
* *
************************************************************************/
static int
myRead(void *f, char *buf, int len) {
return(fread(buf, 1, len, (FILE *) f));
}
static int
myClose(void *context) {
FILE *f = (FILE *) context;
if (f == stdin)
return(0);
return(fclose(f));
}
static xmlDocPtr
parseXml(xmlParserCtxtPtr ctxt, const char *filename) {
xmlDocPtr doc;
xmlCtxtSetResourceLoader(ctxt, xmllintResourceLoader, NULL);
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
#if HAVE_DECL_MMAP
if (memory) {
xmlParserInputPtr input;
input = xmlNewInputFromMemory(filename, memoryData, memorySize,
XML_INPUT_BUF_STATIC |
XML_INPUT_BUF_ZERO_TERMINATED);
if (input == NULL) {
progresult = XMLLINT_ERR_MEM;
return(NULL);
}
doc = xmlCtxtParseDocument(ctxt, input);
return(doc);
}
#endif
if (testIO) {
FILE *f;
if ((filename[0] == '-') && (filename[1] == 0)) {
f = stdin;
} else {
f = fopen(filename, "rb");
if (f == NULL) {
fprintf(ERR_STREAM, "Can't open %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
return(NULL);
}
}
doc = xmlCtxtReadIO(ctxt, myRead, myClose, f, filename, NULL,
options);
} else {
if (strcmp(filename, "-") == 0)
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
}
return(doc);
}
#ifdef LIBXML_HTML_ENABLED
static xmlDocPtr
parseHtml(htmlParserCtxtPtr ctxt, const char *filename) {
xmlDocPtr doc;
#if HAVE_DECL_MMAP
if (memory) {
xmlParserInputPtr input;
input = xmlNewInputFromMemory(filename, memoryData, memorySize,
XML_INPUT_BUF_STATIC |
XML_INPUT_BUF_ZERO_TERMINATED);
if (input == NULL) {
progresult = XMLLINT_ERR_MEM;
return(NULL);
}
doc = htmlCtxtParseDocument(ctxt, input);
return(doc);
}
#endif
if (strcmp(filename, "-") == 0)
doc = htmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
doc = htmlCtxtReadFile(ctxt, filename, NULL, options);
return(doc);
}
#endif /* LIBXML_HTML_ENABLED */
/************************************************************************ /************************************************************************
* * * *
* Memory allocation consumption debugging * * Memory allocation consumption debugging *
@ -496,22 +597,6 @@ xmlHTMLError(void *vctxt, const xmlError *error)
xmlSetGenericErrorFunc(oldErrorCtxt, oldError); xmlSetGenericErrorFunc(oldErrorCtxt, oldError);
} }
/************************************************************************
* *
* I/O Interfaces *
* *
************************************************************************/
static int myRead(void *f, char *buf, int len) {
return(fread(buf, 1, len, (FILE *) f));
}
static int myClose(void *context) {
FILE *f = (FILE *) context;
if (f == stdin)
return(0);
return(fclose(f));
}
/************************************************************************ /************************************************************************
* * * *
* SAX based tests * * SAX based tests *
@ -1374,10 +1459,7 @@ testSAX(const char *filename) {
return; return;
} }
if (strcmp(filename, "-") == 0) parseHtml(ctxt, filename);
htmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
htmlCtxtReadFile(ctxt, filename, NULL, options);
htmlFreeParserCtxt(ctxt); htmlFreeParserCtxt(ctxt);
} else } else
@ -1385,23 +1467,13 @@ testSAX(const char *filename) {
{ {
xmlParserCtxtPtr ctxt = NULL; xmlParserCtxtPtr ctxt = NULL;
/*
* Create the parser context amd hook the input
*/
ctxt = xmlNewSAXParserCtxt(handler, (void *) user_data); ctxt = xmlNewSAXParserCtxt(handler, (void *) user_data);
if (ctxt == NULL) { if (ctxt == NULL) {
progresult = XMLLINT_ERR_MEM; progresult = XMLLINT_ERR_MEM;
return; return;
} }
xmlCtxtSetResourceLoader(ctxt, xmllintResourceLoader, NULL); parseXml(ctxt, filename);
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (strcmp(filename, "-") == 0)
xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
xmlCtxtReadFile(ctxt, filename, NULL, options);
if (ctxt->myDoc != NULL) { if (ctxt->myDoc != NULL) {
fprintf(ERR_STREAM, "SAX generated a doc !\n"); fprintf(ERR_STREAM, "SAX generated a doc !\n");
@ -1506,25 +1578,8 @@ static void streamFile(const char *filename) {
xmlTextReaderPtr reader; xmlTextReaderPtr reader;
int ret; int ret;
#if HAVE_DECL_MMAP #if HAVE_DECL_MMAP
int fd = -1;
struct stat info;
const char *base = NULL;
if (memory) { if (memory) {
if (stat(filename, &info) < 0) reader = xmlReaderForMemory(memoryData, memorySize, filename,
return;
fd = open(filename, O_RDONLY);
if (fd < 0)
return;
base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
if (base == (void *) MAP_FAILED) {
close(fd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
return;
}
reader = xmlReaderForMemory(base, info.st_size, filename,
NULL, options); NULL, options);
} else } else
#endif #endif
@ -1654,12 +1709,6 @@ static void streamFile(const char *filename) {
patstream = NULL; patstream = NULL;
} }
#endif #endif
#if HAVE_DECL_MMAP
if (memory) {
munmap((char *) base, info.st_size);
close(fd);
}
#endif
} }
static void walkDoc(xmlDocPtr doc) { static void walkDoc(xmlDocPtr doc) {
@ -1930,45 +1979,10 @@ parseFile(const char *filename, xmlParserCtxtPtr rectxt) {
} }
#endif /* LIBXML_PUSH_ENABLED */ #endif /* LIBXML_PUSH_ENABLED */
#if HAVE_DECL_MMAP
if ((html) && (memory)) {
int fd;
struct stat info;
const char *base;
if (stat(filename, &info) < 0)
return(NULL);
fd = open(filename, O_RDONLY);
if (fd < 0)
return(NULL);
base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
if (base == (void *) MAP_FAILED) {
close(fd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
return(NULL);
}
doc = htmlReadMemory((char *) base, info.st_size, filename,
NULL, options);
munmap((char *) base, info.st_size);
close(fd);
return(doc);
}
#endif
if (html) { if (html) {
ctxt = htmlNewParserCtxt(); ctxt = htmlNewParserCtxt();
xmlCtxtSetResourceLoader(ctxt, xmllintResourceLoader, NULL); doc = parseHtml(ctxt, filename);
if (strcmp(filename, "-") == 0)
doc = htmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
doc = htmlCtxtReadFile(ctxt, filename, NULL, options);
htmlFreeParserCtxt(ctxt); htmlFreeParserCtxt(ctxt);
return(doc); return(doc);
} }
#endif /* LIBXML_HTML_ENABLED */ #endif /* LIBXML_HTML_ENABLED */
@ -2029,60 +2043,10 @@ parseFile(const char *filename, xmlParserCtxtPtr rectxt) {
ctxt = rectxt; ctxt = rectxt;
} }
xmlCtxtSetResourceLoader(ctxt, xmllintResourceLoader, NULL); doc = parseXml(ctxt, filename);
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
if (htmlout) if (htmlout)
xmlCtxtSetErrorHandler(ctxt, xmlHTMLError, ctxt); xmlCtxtSetErrorHandler(ctxt, xmlHTMLError, ctxt);
if (testIO) {
FILE *f;
if ((filename[0] == '-') && (filename[1] == 0)) {
f = stdin;
} else {
f = fopen(filename, "rb");
if (f == NULL) {
fprintf(ERR_STREAM, "Can't open %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
goto error;
}
}
doc = xmlCtxtReadIO(ctxt, myRead, myClose, f, filename, NULL,
options);
#if HAVE_DECL_MMAP
} else if (memory) {
int fd;
struct stat info;
const char *base;
if (stat(filename, &info) < 0)
goto error;
fd = open(filename, O_RDONLY);
if (fd < 0)
goto error;
base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ;
if (base == (void *) MAP_FAILED) {
close(fd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
goto error;
}
doc = xmlCtxtReadMemory(ctxt, base, info.st_size, filename, NULL,
options);
munmap((char *) base, info.st_size);
close(fd);
#endif
} else {
if (strcmp(filename, "-") == 0)
doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options);
else
doc = xmlCtxtReadFile(ctxt, filename, NULL, options);
}
} }
if (doc == NULL) { if (doc == NULL) {
@ -2097,7 +2061,6 @@ parseFile(const char *filename, xmlParserCtxtPtr rectxt) {
#endif /* LIBXML_VALID_ENABLED */ #endif /* LIBXML_VALID_ENABLED */
} }
error:
if (ctxt != rectxt) if (ctxt != rectxt)
xmlFreeParserCtxt(ctxt); xmlFreeParserCtxt(ctxt);
@ -2910,6 +2873,8 @@ xmllintMain(int argc, const char **argv, xmlResourceLoader loader) {
#endif /* LIBXML_PUSH_ENABLED */ #endif /* LIBXML_PUSH_ENABLED */
#if HAVE_DECL_MMAP #if HAVE_DECL_MMAP
memory = 0; memory = 0;
memoryData = NULL;
memorySize = 0;
#endif #endif
testIO = 0; testIO = 0;
encoding = NULL; encoding = NULL;
@ -3426,11 +3391,42 @@ xmllintMain(int argc, const char **argv, xmlResourceLoader loader) {
} }
} }
#endif /* LIBXML_READER_ENABLED && LIBXML_PATTERN_ENABLED */ #endif /* LIBXML_READER_ENABLED && LIBXML_PATTERN_ENABLED */
for (i = 1; i < argc ; i++) { for (i = 1; i < argc ; i++) {
if ((argv[i][0] == '-') && (strcmp(argv[i], "-") != 0)) { const char *filename = argv[i];
i += skipArgs(argv[i]); #if HAVE_DECL_MMAP
int memoryFd = -1;
#endif
if ((filename[0] == '-') && (strcmp(filename, "-") != 0)) {
i += skipArgs(filename);
continue; continue;
} }
#if HAVE_DECL_MMAP
if (memory) {
struct stat info;
if (stat(filename, &info) < 0) {
progresult = XMLLINT_ERR_RDFILE;
break;
}
memoryFd = open(filename, O_RDONLY);
if (memoryFd < 0) {
progresult = XMLLINT_ERR_RDFILE;
break;
}
memoryData = mmap(NULL, info.st_size + 1, PROT_READ, MAP_SHARED,
memoryFd, 0);
if (memoryData == (void *) MAP_FAILED) {
close(memoryFd);
fprintf(ERR_STREAM, "mmap failure for file %s\n", filename);
progresult = XMLLINT_ERR_RDFILE;
break;
}
memorySize = info.st_size;
}
#endif /* HAVE_DECL_MMAP */
if ((timing) && (repeat)) if ((timing) && (repeat))
startTimer(); startTimer();
if (repeat) { if (repeat) {
@ -3442,20 +3438,16 @@ xmllintMain(int argc, const char **argv, xmlResourceLoader loader) {
goto error; goto error;
} }
xmlCtxtSetResourceLoader(ctxt, xmllintResourceLoader, NULL);
if (maxAmpl > 0)
xmlCtxtSetMaxAmplification(ctxt, maxAmpl);
for (acount = 0;acount < repeat;acount++) { for (acount = 0;acount < repeat;acount++) {
#ifdef LIBXML_READER_ENABLED #ifdef LIBXML_READER_ENABLED
if (stream != 0) { if (stream != 0) {
streamFile(argv[i]); streamFile(filename);
} else { } else {
#endif /* LIBXML_READER_ENABLED */ #endif /* LIBXML_READER_ENABLED */
if (sax) { if (sax) {
testSAX(argv[i]); testSAX(filename);
} else { } else {
parseAndPrintFile(argv[i], ctxt); parseAndPrintFile(filename, ctxt);
} }
#ifdef LIBXML_READER_ENABLED #ifdef LIBXML_READER_ENABLED
} }
@ -3466,19 +3458,26 @@ xmllintMain(int argc, const char **argv, xmlResourceLoader loader) {
} else { } else {
#ifdef LIBXML_READER_ENABLED #ifdef LIBXML_READER_ENABLED
if (stream != 0) if (stream != 0)
streamFile(argv[i]); streamFile(filename);
else else
#endif /* LIBXML_READER_ENABLED */ #endif /* LIBXML_READER_ENABLED */
if (sax) { if (sax) {
testSAX(argv[i]); testSAX(filename);
} else { } else {
parseAndPrintFile(argv[i], NULL); parseAndPrintFile(filename, NULL);
} }
} }
files ++; files ++;
if ((timing) && (repeat)) { if ((timing) && (repeat)) {
endTimer("%d iterations", repeat); endTimer("%d iterations", repeat);
} }
#if HAVE_DECL_MMAP
if (memory) {
munmap(memoryData, memorySize);
close(memoryFd);
}
#endif
} }
if (generate) if (generate)
parseAndPrintFile(NULL, NULL); parseAndPrintFile(NULL, NULL);