diff --git a/ChangeLog b/ChangeLog index 3c5716e4..2e84362f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Thu Jun 28 14:11:28 CEST 2001 Daniel Veillard + + * parser.c: fixed UTF8 BOM support in push mode + * test/utf8bom.xml result/utf8bom.xml result/noent/utf8bom.xml: + added a specific testcase + Wed Jun 27 18:33:13 CEST 2001 Daniel Veillard * Makefile.am: added --push regression tests diff --git a/parser.c b/parser.c index 9c76c8fc..a22eb4ea 100644 --- a/parser.c +++ b/parser.c @@ -7632,11 +7632,31 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { */ goto done; case XML_PARSER_START: - /* - * Very first chars read from the document flow. - */ - if (avail < 2) - goto done; + if (ctxt->charset == XML_CHAR_ENCODING_NONE) { + xmlChar start[4]; + xmlCharEncoding enc; + + /* + * Very first chars read from the document flow. + */ + if (avail < 4) + goto done; + + /* + * Get the 4 first bytes and decode the charset + * if enc != XML_CHAR_ENCODING_NONE + * plug some encoding conversion routines. + */ + start[0] = RAW; + start[1] = NXT(1); + start[2] = NXT(2); + start[3] = NXT(3); + enc = xmlDetectCharEncoding(start, 4); + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + break; + } cur = ctxt->input->cur[0]; next = ctxt->input->cur[1]; @@ -8509,9 +8529,6 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, inputStream->cur = inputStream->buf->buffer->content; inputStream->end = &inputStream->buf->buffer->content[inputStream->buf->buffer->use]; - if (enc != XML_CHAR_ENCODING_NONE) { - xmlSwitchEncoding(ctxt, enc); - } inputPush(ctxt, inputStream); @@ -8523,6 +8540,10 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, #endif } + if (enc != XML_CHAR_ENCODING_NONE) { + xmlSwitchEncoding(ctxt, enc); + } + return(ctxt); } diff --git a/result/noent/utf8bom.xml b/result/noent/utf8bom.xml new file mode 100644 index 00000000..f4e51640 --- /dev/null +++ b/result/noent/utf8bom.xml @@ -0,0 +1,2 @@ + + diff --git a/result/utf8bom.xml b/result/utf8bom.xml new file mode 100644 index 00000000..f4e51640 --- /dev/null +++ b/result/utf8bom.xml @@ -0,0 +1,2 @@ + + diff --git a/test/utf8bom.xml b/test/utf8bom.xml new file mode 100644 index 00000000..b4cdff0b --- /dev/null +++ b/test/utf8bom.xml @@ -0,0 +1 @@ +