mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
parser: Fix regression when switching input encodings
Revert some changes from commit 98840d40. WebKit/Chromium can actually switch from ISO-8859-1 to UTF-16 in the middle of parsing. This is a bad idea, but we have to keep supporting this use case.
This commit is contained in:
parent
b4d46cee80
commit
a19fa11e1d
@ -1177,12 +1177,20 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
}
|
||||
|
||||
if (in->encoder != NULL) {
|
||||
if (in->encoder == handler)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* TODO: Detect encoding mismatch. We should start by comparing
|
||||
* in->encoder->name and handler->name, but there are a few
|
||||
* compatible encodings like UTF-16 and UCS-2 or UTF-32 and UCS-4.
|
||||
* Switching encodings during parsing is a really bad idea,
|
||||
* but WebKit/Chromium switches from ISO-8859-1 to UTF-16 as soon as
|
||||
* it finds Unicode characters with code points larger than 255.
|
||||
*
|
||||
* TODO: We should check whether the "raw" input buffer is empty and
|
||||
* convert the old content using the old encoder.
|
||||
*/
|
||||
xmlCharEncCloseFunc(handler);
|
||||
|
||||
xmlCharEncCloseFunc(in->encoder);
|
||||
in->encoder = handler;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user