diff --git a/ChangeLog b/ChangeLog index 41651f03..13df08f4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sat Jul 22 05:59:05 CEST 2000 Daniel Veillard + + * parser.c xmlIO.[ch]: fixed the problem of encoding support + when using in memory parsing. Need some cleanup. + * xmllint.c configure.in: added a --memory flag to test memory + parsing + Fri Jul 21 17:09:57 CEST 2000 Daniel Veillard * nanohttp.c: fixed socklen_t replacement to unsigned int diff --git a/config.h.in b/config.h.in index da75401c..7a7587a9 100644 --- a/config.h.in +++ b/config.h.in @@ -99,6 +99,9 @@ /* Define if you have the header file. */ #undef HAVE_SYS_DIR_H +/* Define if you have the header file. */ +#undef HAVE_SYS_MMAN_H + /* Define if you have the header file. */ #undef HAVE_SYS_NDIR_H diff --git a/configure.in b/configure.in index 0ae4e05d..51178c81 100644 --- a/configure.in +++ b/configure.in @@ -71,7 +71,7 @@ AC_CHECK_HEADERS(fcntl.h unistd.h ctype.h dirent.h errno.h malloc.h) AC_CHECK_HEADERS(stdarg.h sys/stat.h sys/types.h time.h) AC_CHECK_HEADERS(ieeefp.h nan.h math.h fp_class.h float.h) AC_CHECK_HEADERS(stdlib.h sys/socket.h netinet/in.h arpa/inet.h) -AC_CHECK_HEADERS(netdb.h sys/time.h sys/select.h) +AC_CHECK_HEADERS(netdb.h sys/time.h sys/select.h sys/mman.h) dnl Specific dir for HTML output ? if test "x$with_html_dir" = "x" ; then diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h index 5289367e..b966c4a7 100644 --- a/include/libxml/xmlIO.h +++ b/include/libxml/xmlIO.h @@ -83,6 +83,9 @@ xmlParserInputBufferPtr xmlParserInputBufferPtr xmlParserInputBufferCreateFd (int fd, xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); xmlParserInputBufferPtr xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, diff --git a/parser.c b/parser.c index 3e7834dc..43a8a40d 100644 --- a/parser.c +++ b/parser.c @@ -2555,7 +2555,7 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) } return(0); } else { - if (ctxt->input->length == 0) { + if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) { /* * When parsing a static memory array one must know the * size to be able to convert the buffer. @@ -2572,9 +2572,10 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) * Move it as the raw buffer and create a new input buffer */ processed = ctxt->input->cur - ctxt->input->base; + ctxt->input->buf->raw = xmlBufferCreate(); xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur, - ctxt->input->length - processed); + ctxt->input->length - processed); ctxt->input->buf->buffer = xmlBufferCreate(); /* @@ -10575,6 +10576,7 @@ xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer, int size) { xmlParserCtxtPtr ctxt; xmlParserInputPtr input; + xmlParserInputBufferPtr buf; if (buffer[size] != 0) return(NULL); @@ -10583,6 +10585,9 @@ xmlCreateMemoryParserCtxt(char *buffer, int size) { if (ctxt == NULL) return(NULL); + buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE); + if (buf == NULL) return(NULL); + input = xmlNewInputStream(ctxt); if (input == NULL) { xmlFreeParserCtxt(ctxt); @@ -10590,14 +10595,9 @@ xmlCreateMemoryParserCtxt(char *buffer, int size) { } input->filename = NULL; - input->line = 1; - input->col = 1; - input->buf = NULL; - input->consumed = 0; - - input->base = BAD_CAST buffer; - input->cur = BAD_CAST buffer; - input->free = NULL; + input->buf = buf; + input->base = input->buf->buffer->content; + input->cur = input->buf->buffer->content; inputPush(ctxt, input); return(ctxt); diff --git a/xmlIO.c b/xmlIO.c index 531e3aa3..31729173 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -89,6 +89,11 @@ int xmlOutputCallbackInitialized = 0; * * ************************************************************************/ +int +xmlNop(void) { + return(0); +} + /** * xmlFdMatch: * @filename: the URI for matching @@ -1044,6 +1049,35 @@ xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) { return(ret); } +/** + * xmlParserInputBufferCreateMem: + * @mem: the memory input + * @size: the length of the memory block + * @enc: the charset encoding if known + * + * Create a buffered parser input for the progressive parsing for the input + * from a file descriptor + * + * Returns the new parser input or NULL + */ +xmlParserInputBufferPtr +xmlParserInputBufferCreateMem(const char *mem, int size, xmlCharEncoding enc) { + xmlParserInputBufferPtr ret; + + if (size <= 0) return(NULL); + if (mem == NULL) return(NULL); + + ret = xmlAllocParserInputBuffer(enc); + if (ret != NULL) { + ret->context = (void *) mem; + ret->readcallback = (xmlInputReadCallback) xmlNop; + ret->closecallback = NULL; + xmlBufferAdd(ret->buffer, (const xmlChar *) mem, size); + } + + return(ret); +} + /** * xmlOutputBufferCreateFd: * @fd: a file descriptor number diff --git a/xmlIO.h b/xmlIO.h index 5289367e..b966c4a7 100644 --- a/xmlIO.h +++ b/xmlIO.h @@ -83,6 +83,9 @@ xmlParserInputBufferPtr xmlParserInputBufferPtr xmlParserInputBufferCreateFd (int fd, xmlCharEncoding enc); +xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); xmlParserInputBufferPtr xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, xmlInputCloseCallback ioclose, diff --git a/xmllint.c b/xmllint.c index 3540e7ef..18805f72 100644 --- a/xmllint.c +++ b/xmllint.c @@ -29,6 +29,9 @@ #ifdef HAVE_UNISTD_H #include #endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif #ifdef HAVE_STDLIB_H #include #endif @@ -66,6 +69,9 @@ static int compress = 0; static int html = 0; static int htmlout = 0; static int push = 0; +#ifdef HAVE_SYS_MMAN_H +static int memory = 0; +#endif static int noblanks = 0; static int testIO = 0; static char *encoding = NULL; @@ -445,6 +451,22 @@ void parseAndPrintFile(char *filename) { xmlFreeDoc(doc); doc = NULL; } +#ifdef HAVE_SYS_MMAN_H + } else if (memory) { + int fd; + struct stat info; + const char *base; + if (stat(filename, &info) < 0) + return; + if ((fd = open(filename, O_RDONLY)) < 0) + return; + base = mmap(NULL, info.st_size, PROT_READ, MAP_SHARED, fd, 0) ; + if (base == MAP_FAILED) + return; + + doc = xmlParseMemory((char *) base, info.st_size); + munmap((char *) base, info.st_size); +#endif } else doc = xmlParseFile(filename); #ifdef LIBXML_HTML_ENABLED @@ -585,6 +607,11 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push"))) push++; +#ifdef HAVE_SYS_MMAN_H + else if ((!strcmp(argv[i], "-memory")) || + (!strcmp(argv[i], "--memory"))) + memory++; +#endif else if ((!strcmp(argv[i], "-testIO")) || (!strcmp(argv[i], "--testIO"))) testIO++; @@ -663,6 +690,9 @@ int main(int argc, char **argv) { printf("\t--html : use the HTML parser\n"); #endif printf("\t--push : use the push mode of the parser\n"); +#ifdef HAVE_SYS_MMAN_H + printf("\t--memory : parse from memory\n"); +#endif printf("\t--nowarning : do not emit warnings from parser/validator\n"); printf("\t--noblanks : drop (ignorable?) blanks spaces\n"); printf("\t--testIO : test user I/O support\n");