Heap-based buffer overread in htmlCurrentChar

For https://bugzilla.gnome.org/show_bug.cgi?id=758606

* parserInternals.c:
(xmlNextChar): Add an test to catch other issues on ctxt->input
corruption proactively.
For non-UTF-8 charsets, xmlNextChar() failed to check for the end
of the input buffer and would continuing reading.  Fix this by
pulling out the check for the end of the input buffer into common
code, and return if we reach the end of the input buffer
prematurely.
* result/HTML/758606.html: Added.
* result/HTML/758606.html.err: Added.
* result/HTML/758606.html.sax: Added.
* result/HTML/758606_2.html: Added.
* result/HTML/758606_2.html.err: Added.
* result/HTML/758606_2.html.sax: Added.
* test/HTML/758606.html: Added test case.
* test/HTML/758606_2.html: Added test case.
This commit is contained in:
Pranjal Jumde 2016-03-01 15:18:04 -08:00 committed by Daniel Veillard
parent 0090675905
commit 0bcd05c5cd
9 changed files with 154 additions and 83 deletions

View File

@ -55,6 +55,10 @@
#include <libxml/globals.h> #include <libxml/globals.h>
#include <libxml/chvalid.h> #include <libxml/chvalid.h>
#define CUR(ctxt) ctxt->input->cur
#define END(ctxt) ctxt->input->end
#define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
#include "buf.h" #include "buf.h"
#include "enc.h" #include "enc.h"
@ -422,103 +426,105 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
(ctxt->input == NULL)) (ctxt->input == NULL))
return; return;
if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if (!(VALID_CTXT(ctxt))) {
if ((*ctxt->input->cur == 0) && xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);
(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && ctxt->errNo = XML_ERR_INTERNAL_ERROR;
(ctxt->instate != XML_PARSER_COMMENT)) { xmlStopParser(ctxt);
/* return;
* If we are at the end of the current entity and }
* the context allows it, we pop consumed entities
* automatically. if ((*ctxt->input->cur == 0) &&
* the auto closing should be blocked in other cases (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
*/ if ((ctxt->instate != XML_PARSER_COMMENT))
xmlPopInput(ctxt); xmlPopInput(ctxt);
} else { return;
const unsigned char *cur; }
unsigned char c;
/* if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
* 2.11 End-of-Line Handling const unsigned char *cur;
* the literal two-character sequence "#xD#xA" or a standalone unsigned char c;
* literal #xD, an XML processor must pass to the application
* the single character #xA.
*/
if (*(ctxt->input->cur) == '\n') {
ctxt->input->line++; ctxt->input->col = 1;
} else
ctxt->input->col++;
/* /*
* We are supposed to handle UTF8, check it's valid * 2.11 End-of-Line Handling
* From rfc2044: encoding of the Unicode values on UTF-8: * the literal two-character sequence "#xD#xA" or a standalone
* * literal #xD, an XML processor must pass to the application
* UCS-4 range (hex.) UTF-8 octet sequence (binary) * the single character #xA.
* 0000 0000-0000 007F 0xxxxxxx */
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx if (*(ctxt->input->cur) == '\n') {
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx ctxt->input->line++; ctxt->input->col = 1;
* } else
* Check for the 0x110000 limit too ctxt->input->col++;
*/
cur = ctxt->input->cur;
c = *cur; /*
if (c & 0x80) { * We are supposed to handle UTF8, check it's valid
if (c == 0xC0) * From rfc2044: encoding of the Unicode values on UTF-8:
goto encoding_error; *
if (cur[1] == 0) { * UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 0000 0000-0000 007F 0xxxxxxx
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
*
* Check for the 0x110000 limit too
*/
cur = ctxt->input->cur;
c = *cur;
if (c & 0x80) {
if (c == 0xC0)
goto encoding_error;
if (cur[1] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur;
}
if ((cur[1] & 0xc0) != 0x80)
goto encoding_error;
if ((c & 0xe0) == 0xe0) {
unsigned int val;
if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur; cur = ctxt->input->cur;
} }
if ((cur[1] & 0xc0) != 0x80) if ((cur[2] & 0xc0) != 0x80)
goto encoding_error; goto encoding_error;
if ((c & 0xe0) == 0xe0) { if ((c & 0xf0) == 0xf0) {
unsigned int val; if (cur[3] == 0) {
if (cur[2] == 0) {
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
cur = ctxt->input->cur; cur = ctxt->input->cur;
} }
if ((cur[2] & 0xc0) != 0x80) if (((c & 0xf8) != 0xf0) ||
((cur[3] & 0xc0) != 0x80))
goto encoding_error; goto encoding_error;
if ((c & 0xf0) == 0xf0) { /* 4-byte code */
if (cur[3] == 0) { ctxt->input->cur += 4;
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); val = (cur[0] & 0x7) << 18;
cur = ctxt->input->cur; val |= (cur[1] & 0x3f) << 12;
} val |= (cur[2] & 0x3f) << 6;
if (((c & 0xf8) != 0xf0) || val |= cur[3] & 0x3f;
((cur[3] & 0xc0) != 0x80)) } else {
goto encoding_error; /* 3-byte code */
/* 4-byte code */ ctxt->input->cur += 3;
ctxt->input->cur += 4; val = (cur[0] & 0xf) << 12;
val = (cur[0] & 0x7) << 18; val |= (cur[1] & 0x3f) << 6;
val |= (cur[1] & 0x3f) << 12; val |= cur[2] & 0x3f;
val |= (cur[2] & 0x3f) << 6; }
val |= cur[3] & 0x3f; if (((val > 0xd7ff) && (val < 0xe000)) ||
} else { ((val > 0xfffd) && (val < 0x10000)) ||
/* 3-byte code */ (val >= 0x110000)) {
ctxt->input->cur += 3; xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
val = (cur[0] & 0xf) << 12; "Char 0x%X out of allowed range\n",
val |= (cur[1] & 0x3f) << 6; val);
val |= cur[2] & 0x3f; }
}
if (((val > 0xd7ff) && (val < 0xe000)) ||
((val > 0xfffd) && (val < 0x10000)) ||
(val >= 0x110000)) {
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
"Char 0x%X out of allowed range\n",
val);
}
} else
/* 2-byte code */
ctxt->input->cur += 2;
} else } else
/* 1-byte code */ /* 2-byte code */
ctxt->input->cur++; ctxt->input->cur += 2;
} else
/* 1-byte code */
ctxt->input->cur++;
ctxt->nbChars++; ctxt->nbChars++;
if (*ctxt->input->cur == 0) if (*ctxt->input->cur == 0)
xmlParserInputGrow(ctxt->input, INPUT_CHUNK); xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
}
} else { } else {
/* /*
* Assume it's a fixed length encoding (1) with * Assume it's a fixed length encoding (1) with

2
result/HTML/758606.html Normal file
View File

@ -0,0 +1,2 @@
<!DOCTYPE >

View File

@ -0,0 +1,16 @@
./test/HTML/758606.html:1: HTML parser error : Comment not terminated
<!--
<!-- <!doctype
^
./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
<!-- <!doctype
^
./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
<!-- <!doctype
^
./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
^
./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
^

View File

@ -0,0 +1,10 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: Comment not terminated
<!--
SAX.error: Invalid char in CDATA 0xC
SAX.error: Misplaced DOCTYPE declaration
SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
SAX.error: DOCTYPE improperly terminated
SAX.internalSubset((null), , )
SAX.endDocument()

View File

@ -0,0 +1,2 @@
<!DOCTYPE >
<html><body><p>&#145;</p></body></html>

View File

@ -0,0 +1,16 @@
./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated
<!--
<!-- <!dOctYPE
^
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
<!-- <!dOctYPE
^
./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
‘<!dOctYPE
^
./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
^
./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
^

View File

@ -0,0 +1,17 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: Comment not terminated
<!--
SAX.error: Invalid char in CDATA 0xC
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&#145;, 2)
SAX.error: Misplaced DOCTYPE declaration
SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
SAX.error: DOCTYPE improperly terminated
SAX.internalSubset((null), , )
SAX.endElement(p)
SAX.endElement(body)
SAX.endElement(html)
SAX.endDocument()

1
test/HTML/758606.html Normal file
View File

@ -0,0 +1 @@
<!-- <!doctype

1
test/HTML/758606_2.html Normal file
View File

@ -0,0 +1 @@
<!-- <!dOctYPE