mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
Fixed the HTTP<->parser interraction, which should fix 2 long standing
* include/libxml/nanohttp.h include/libxml/parserInternals.h include/libxml/xmlIO.h nanohttp.c parserInternals.c xmlIO.c: Fixed the HTTP<->parser interraction, which should fix 2 long standing bugs #104790 and #124054 , this also fix the fact that HTTP error code (> 400) should not generate data, we usually don't want to parse the HTML error information instead of the resource looked at. Daniel
This commit is contained in:
parent
fc60fc2009
commit
a840b69261
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
||||
Sun Oct 19 15:31:43 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||
|
||||
* include/libxml/nanohttp.h include/libxml/parserInternals.h
|
||||
include/libxml/xmlIO.h nanohttp.c parserInternals.c xmlIO.c:
|
||||
Fixed the HTTP<->parser interraction, which should fix 2 long
|
||||
standing bugs #104790 and #124054 , this also fix the fact that
|
||||
HTTP error code (> 400) should not generate data, we usually
|
||||
don't want to parse the HTML error information instead of the
|
||||
resource looked at.
|
||||
|
||||
Sun Oct 19 19:20:48 HKT 2003 William Brack <wbrack@mmm.com.hk>
|
||||
|
||||
* doc/Makefile.am: enhanced the installation of tutorial files
|
||||
|
@ -53,9 +53,11 @@ XMLPUBFUN int XMLCALL
|
||||
XMLPUBFUN const char * XMLCALL
|
||||
xmlNanoHTTPAuthHeader (void *ctx);
|
||||
XMLPUBFUN const char * XMLCALL
|
||||
xmlNanoHTTPRedir (void * ctx);
|
||||
xmlNanoHTTPRedir (void *ctx);
|
||||
XMLPUBFUN const char * XMLCALL
|
||||
xmlNanoHTTPEncoding (void * ctx);
|
||||
xmlNanoHTTPEncoding (void *ctx);
|
||||
XMLPUBFUN const char * XMLCALL
|
||||
xmlNanoHTTPMimeType (void *ctx);
|
||||
XMLPUBFUN int XMLCALL
|
||||
xmlNanoHTTPRead (void *ctx,
|
||||
void *dest,
|
||||
|
@ -271,8 +271,21 @@ XMLPUBFUN int XMLCALL
|
||||
xmlCharEncoding enc);
|
||||
XMLPUBFUN int XMLCALL
|
||||
xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
XMLPUBFUN int XMLCALL
|
||||
xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlParserInputPtr input,
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
|
||||
#ifdef IN_LIBXML
|
||||
/* internal error reporting */
|
||||
XMLPUBFUN void XMLCALL
|
||||
__xmlErrEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlParserErrors error,
|
||||
const char *msg,
|
||||
const xmlChar * str1,
|
||||
const xmlChar * str2);
|
||||
#endif
|
||||
/**
|
||||
* Entities
|
||||
*/
|
||||
|
@ -260,6 +260,9 @@ XMLPUBFUN void * XMLCALL
|
||||
XMLPUBFUN void XMLCALL
|
||||
xmlRegisterHTTPPostCallbacks (void );
|
||||
#endif
|
||||
XMLPUBFUN xmlParserInputPtr XMLCALL
|
||||
xmlCheckHTTPInput (xmlParserCtxtPtr ctxt,
|
||||
xmlParserInputPtr ret);
|
||||
|
||||
/*
|
||||
* A predefined entity loader disabling network accesses
|
||||
|
60
nanohttp.c
60
nanohttp.c
@ -150,6 +150,7 @@ typedef struct xmlNanoHTTPCtxt {
|
||||
char *location; /* the new URL in case of redirect */
|
||||
char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
|
||||
char *encoding; /* encoding extracted from the contentType */
|
||||
char *mimeType; /* Mime-Type extracted from the contentType */
|
||||
} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
|
||||
|
||||
static int initialized = 0;
|
||||
@ -530,6 +531,7 @@ xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
|
||||
if (ctxt->in != NULL) xmlFree(ctxt->in);
|
||||
if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
|
||||
if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
|
||||
if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
|
||||
if (ctxt->location != NULL) xmlFree(ctxt->location);
|
||||
if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
|
||||
ctxt->state = XML_NANO_HTTP_NONE;
|
||||
@ -737,7 +739,7 @@ xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
|
||||
* Try to extract useful informations from the server answer.
|
||||
* We currently parse and process:
|
||||
* - The HTTP revision/ return code
|
||||
* - The Content-Type
|
||||
* - The Content-Type, Mime-Type and charset used
|
||||
* - The Location for redirect processing.
|
||||
*
|
||||
* Returns -1 in case of failure, the file descriptor number otherwise
|
||||
@ -781,16 +783,56 @@ xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
|
||||
if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
|
||||
ctxt->returnValue = ret;
|
||||
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
|
||||
const xmlChar *charset, *last, *mime;
|
||||
cur += 13;
|
||||
while ((*cur == ' ') || (*cur == '\t')) cur++;
|
||||
if (ctxt->contentType != NULL)
|
||||
xmlFree(ctxt->contentType);
|
||||
ctxt->contentType = xmlMemStrdup(cur);
|
||||
mime = (const xmlChar *) cur;
|
||||
last = mime;
|
||||
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
|
||||
(*last != ';') && (*last != ','))
|
||||
last++;
|
||||
if (ctxt->mimeType != NULL)
|
||||
xmlFree(ctxt->mimeType);
|
||||
ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
|
||||
charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
|
||||
if (charset != NULL) {
|
||||
charset += 8;
|
||||
last = charset;
|
||||
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
|
||||
(*last != ';') && (*last != ','))
|
||||
last++;
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree(ctxt->encoding);
|
||||
ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
|
||||
}
|
||||
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
|
||||
const xmlChar *charset, *last, *mime;
|
||||
cur += 12;
|
||||
if (ctxt->contentType != NULL) return;
|
||||
while ((*cur == ' ') || (*cur == '\t')) cur++;
|
||||
ctxt->contentType = xmlMemStrdup(cur);
|
||||
mime = (const xmlChar *) cur;
|
||||
last = mime;
|
||||
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
|
||||
(*last != ';') && (*last != ','))
|
||||
last++;
|
||||
if (ctxt->mimeType != NULL)
|
||||
xmlFree(ctxt->mimeType);
|
||||
ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
|
||||
charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
|
||||
if (charset != NULL) {
|
||||
charset += 8;
|
||||
last = charset;
|
||||
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
|
||||
(*last != ';') && (*last != ','))
|
||||
last++;
|
||||
if (ctxt->encoding != NULL)
|
||||
xmlFree(ctxt->encoding);
|
||||
ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
|
||||
}
|
||||
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
|
||||
cur += 9;
|
||||
while ((*cur == ' ') || (*cur == '\t')) cur++;
|
||||
@ -1227,6 +1269,7 @@ retry:
|
||||
ctxt = xmlNanoHTTPNewCtxt(URL);
|
||||
else {
|
||||
ctxt = xmlNanoHTTPNewCtxt(redirURL);
|
||||
ctxt->location = xmlMemStrdup(redirURL);
|
||||
}
|
||||
|
||||
if ( ctxt == NULL ) {
|
||||
@ -1608,6 +1651,21 @@ xmlNanoHTTPEncoding( void * ctx ) {
|
||||
return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlNanoHTTPMimeType:
|
||||
* @ctx: the HTTP context
|
||||
*
|
||||
* Provides the specified Mime-Type if specified in the HTTP headers.
|
||||
*
|
||||
* Return the specified Mime-Type or NULL if not available
|
||||
*/
|
||||
const char *
|
||||
xmlNanoHTTPMimeType( void * ctx ) {
|
||||
xmlNanoHTTPCtxtPtr ctxt = ctx;
|
||||
|
||||
return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlNanoHTTPFetchContent:
|
||||
* @ctx: the HTTP context
|
||||
|
@ -122,7 +122,7 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlErrEncoding:
|
||||
* __xmlErrEncoding:
|
||||
* @ctxt: an XML parser context
|
||||
* @error: the error number
|
||||
* @msg: the error message
|
||||
@ -131,9 +131,9 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
|
||||
*
|
||||
* Handle an encoding error
|
||||
*/
|
||||
static void
|
||||
xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
||||
const char *msg, const xmlChar * str1, const xmlChar * str2)
|
||||
void
|
||||
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error,
|
||||
const char *msg, const xmlChar * str1, const xmlChar * str2)
|
||||
{
|
||||
if (ctxt != NULL)
|
||||
ctxt->errNo = error;
|
||||
@ -558,7 +558,7 @@ encoding_error:
|
||||
* to ISO-Latin-1 (if you don't like this policy, just declare the
|
||||
* encoding !)
|
||||
*/
|
||||
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Input is not proper UTF-8, indicate encoding !\n",
|
||||
NULL, NULL);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
|
||||
@ -704,7 +704,7 @@ encoding_error:
|
||||
* to ISO-Latin-1 (if you don't like this policy, just declare the
|
||||
* encoding !)
|
||||
*/
|
||||
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Input is not proper UTF-8, indicate encoding !\n",
|
||||
NULL, NULL);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
|
||||
@ -804,7 +804,7 @@ encoding_error:
|
||||
* to ISO-Latin-1 (if you don't like this policy, just declare the
|
||||
* encoding !)
|
||||
*/
|
||||
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
|
||||
"Input is not proper UTF-8, indicate encoding !\n",
|
||||
NULL, NULL);
|
||||
if ((ctxt != NULL) && (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
|
||||
@ -901,7 +901,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
|
||||
"encoding unknown\n", NULL, NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
@ -951,7 +951,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
*/
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
|
||||
"encoding unknown\n", NULL, NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
@ -968,32 +968,32 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "USC4 little endian", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "USC4 big endian", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "EBCDIC", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "UCS4 2143", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "UCS4 3412", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "UCS2", NULL);
|
||||
break;
|
||||
@ -1020,17 +1020,17 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
ctxt->charset = enc;
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "ISO-2022-JP", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "Shift_JIS", NULL);
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"encoding not supported %s\n",
|
||||
BAD_CAST "EUC-JP", NULL);
|
||||
break;
|
||||
@ -1042,6 +1042,175 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
return(xmlSwitchToEncoding(ctxt, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchInputEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @input: the input stream
|
||||
* @handler: the encoding handler
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler == NULL)
|
||||
return (-1);
|
||||
if (input == NULL)
|
||||
return (-1);
|
||||
if (input->buf != NULL) {
|
||||
if (input->buf->encoder != NULL) {
|
||||
/*
|
||||
* Check in case the auto encoding detetection triggered
|
||||
* in already.
|
||||
*/
|
||||
if (input->buf->encoder == handler)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* "UTF-16" can be used for both LE and BE
|
||||
if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
|
||||
BAD_CAST "UTF-16", 6)) &&
|
||||
(!xmlStrncmp(BAD_CAST handler->name,
|
||||
BAD_CAST "UTF-16", 6))) {
|
||||
return(0);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*/
|
||||
xmlCharEncCloseFunc(input->buf->encoder);
|
||||
input->buf->encoder = handler;
|
||||
return (0);
|
||||
}
|
||||
input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
/*
|
||||
* Errata on XML-1.0 June 20 2001
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-8
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-8")) &&
|
||||
(input->cur[0] == 0xEF) &&
|
||||
(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
|
||||
input->cur += 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = input->cur - input->base;
|
||||
xmlBufferShrink(input->buf->buffer, processed);
|
||||
input->buf->raw = input->buf->buffer;
|
||||
input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* convert as much as possible of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(input->buf->encoder,
|
||||
input->buf->buffer,
|
||||
input->buf->raw);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLine(input->buf->encoder,
|
||||
input->buf->buffer,
|
||||
input->buf->raw);
|
||||
}
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"switching encoding: encoder error\n",
|
||||
NULL);
|
||||
return (-1);
|
||||
}
|
||||
input->base = input->cur = input->buf->buffer->content;
|
||||
input->end = &input->base[input->buf->buffer->use];
|
||||
|
||||
}
|
||||
return (0);
|
||||
} else {
|
||||
if ((input->length == 0) || (input->buf == NULL)) {
|
||||
/*
|
||||
* When parsing a static memory array one must know the
|
||||
* size to be able to convert the buffer.
|
||||
*/
|
||||
xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
|
||||
return (-1);
|
||||
} else {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = input->cur - input->base;
|
||||
|
||||
input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(input->buf->raw, input->cur,
|
||||
input->length - processed);
|
||||
input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(input->buf->encoder,
|
||||
input->buf->buffer,
|
||||
input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"switching encoding: encoder error\n",
|
||||
NULL);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((input->free != NULL) && (input->base != NULL))
|
||||
input->free((xmlChar *) input->base);
|
||||
input->base = input->cur = input->buf->buffer->content;
|
||||
input->end = &input->base[input->buf->buffer->use];
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchToEncoding:
|
||||
* @ctxt: the parser context
|
||||
@ -1055,165 +1224,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
int
|
||||
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler != NULL) {
|
||||
if (ctxt->input != NULL) {
|
||||
if (ctxt->input->buf != NULL) {
|
||||
if (ctxt->input->buf->encoder != NULL) {
|
||||
/*
|
||||
* Check in case the auto encoding detetection triggered
|
||||
* in already.
|
||||
*/
|
||||
if (ctxt->input->buf->encoder == handler)
|
||||
return(0);
|
||||
|
||||
/*
|
||||
* "UTF-16" can be used for both LE and BE
|
||||
if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
|
||||
BAD_CAST "UTF-16", 6)) &&
|
||||
(!xmlStrncmp(BAD_CAST handler->name,
|
||||
BAD_CAST "UTF-16", 6))) {
|
||||
return(0);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*/
|
||||
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
|
||||
ctxt->input->buf->encoder = handler;
|
||||
return(0);
|
||||
}
|
||||
ctxt->input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((ctxt->input->buf->buffer != NULL) &&
|
||||
(ctxt->input->buf->buffer->use > 0)) {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(ctxt->input->cur[0] == 0xFF) &&
|
||||
(ctxt->input->cur[1] == 0xFE)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(ctxt->input->cur[0] == 0xFE) &&
|
||||
(ctxt->input->cur[1] == 0xFF)) {
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
/*
|
||||
* Errata on XML-1.0 June 20 2001
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-8
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-8")) &&
|
||||
(ctxt->input->cur[0] == 0xEF) &&
|
||||
(ctxt->input->cur[1] == 0xBB) &&
|
||||
(ctxt->input->cur[2] == 0xBF)) {
|
||||
ctxt->input->cur += 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* convert as much as possible of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
}
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"xmlSwitchToEncoding: encoder error\n",
|
||||
NULL);
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
ctxt->input->end =
|
||||
&ctxt->input->base[ctxt->input->buf->buffer->use];
|
||||
|
||||
}
|
||||
return(0);
|
||||
} else {
|
||||
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
|
||||
/*
|
||||
* When parsing a static memory array one must know the
|
||||
* size to be able to convert the buffer.
|
||||
*/
|
||||
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
|
||||
NULL);
|
||||
return(-1);
|
||||
} else {
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
|
||||
ctxt->input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||
ctxt->input->length - processed);
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"xmlSwitchToEncoding: encoder error\n",
|
||||
NULL);
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((ctxt->input->free != NULL) &&
|
||||
(ctxt->input->base != NULL))
|
||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
ctxt->input->end =
|
||||
&ctxt->input->base[ctxt->input->buf->buffer->use];
|
||||
}
|
||||
}
|
||||
xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
|
||||
} else {
|
||||
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
|
||||
NULL);
|
||||
@ -1226,7 +1239,6 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
} else
|
||||
return(-1);
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
@ -1417,7 +1429,7 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
|
||||
* @ctxt: an XML parser context
|
||||
* @filename: the filename to use as entity
|
||||
*
|
||||
* Create a new input stream based on a file.
|
||||
* Create a new input stream based on a file or an URL.
|
||||
*
|
||||
* Returns the new input stream or NULL in case of error
|
||||
*/
|
||||
@ -1436,20 +1448,25 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
|
||||
if (buf == NULL)
|
||||
return(NULL);
|
||||
|
||||
URI = xmlStrdup((xmlChar *) filename);
|
||||
directory = xmlParserGetDirectory((const char *) URI);
|
||||
|
||||
inputStream = xmlNewInputStream(ctxt);
|
||||
if (inputStream == NULL) {
|
||||
if (directory != NULL) xmlFree((char *) directory);
|
||||
if (URI != NULL) xmlFree((char *) URI);
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
inputStream->buf = buf;
|
||||
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
|
||||
if (inputStream == NULL)
|
||||
return(NULL);
|
||||
|
||||
if (inputStream->filename == NULL)
|
||||
URI = xmlStrdup((xmlChar *) filename);
|
||||
else
|
||||
URI = xmlStrdup((xmlChar *) inputStream->filename);
|
||||
directory = xmlParserGetDirectory((const char *) URI);
|
||||
inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
|
||||
if (URI != NULL) xmlFree((char *) URI);
|
||||
inputStream->directory = directory;
|
||||
inputStream->buf = buf;
|
||||
|
||||
inputStream->base = inputStream->buf->buffer->content;
|
||||
inputStream->cur = inputStream->buf->buffer->content;
|
||||
|
201
xmlIO.c
201
xmlIO.c
@ -2961,6 +2961,80 @@ xmlParserGetDirectory(const char *filename) {
|
||||
* *
|
||||
****************************************************************/
|
||||
|
||||
/**
|
||||
* xmlCheckHTTPInput:
|
||||
* @ctxt: an XML parser context
|
||||
* @ret: an XML parser input
|
||||
*
|
||||
* Check an input in case it was created from an HTTP stream, in that
|
||||
* case it will handle encoding and update of the base URL in case of
|
||||
* redirection. It also checks for HTTP errors in which case the input
|
||||
* is cleanly freed up and an appropriate error is raised in context
|
||||
*
|
||||
* Returns the input or NULL in case of HTTP error.
|
||||
*/
|
||||
xmlParserInputPtr
|
||||
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
|
||||
#ifdef LIBXML_HTTP_ENABLED
|
||||
if ((ret != NULL) && (ret->buf != NULL) &&
|
||||
(ret->buf->readcallback == xmlIOHTTPRead) &&
|
||||
(ret->buf->context != NULL)) {
|
||||
const char *encoding;
|
||||
const char *redir;
|
||||
const char *mime;
|
||||
int code;
|
||||
|
||||
code = xmlNanoHTTPReturnCode(ret->buf->context);
|
||||
if (code >= 400) {
|
||||
/* fatal error */
|
||||
if (ret->filename != NULL)
|
||||
xmlLoaderErr(ctxt, "failed to load HTTP resource \"%s\"\n",
|
||||
(const char *) ret->filename);
|
||||
else
|
||||
xmlLoaderErr(ctxt, "failed to load HTTP resource\n", NULL);
|
||||
xmlFreeInputStream(ret);
|
||||
ret = NULL;
|
||||
} else {
|
||||
|
||||
mime = xmlNanoHTTPMimeType(ret->buf->context);
|
||||
if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
|
||||
(xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
|
||||
encoding = xmlNanoHTTPEncoding(ret->buf->context);
|
||||
if (encoding != NULL) {
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
handler = xmlFindCharEncodingHandler(encoding);
|
||||
if (handler != NULL) {
|
||||
xmlSwitchInputEncoding(ctxt, ret, handler);
|
||||
} else {
|
||||
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
|
||||
"Unknown encoding %s",
|
||||
BAD_CAST encoding, NULL);
|
||||
}
|
||||
if (ret->encoding == NULL)
|
||||
ret->encoding = xmlStrdup(BAD_CAST encoding);
|
||||
}
|
||||
#if 0
|
||||
} else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
|
||||
#endif
|
||||
}
|
||||
redir = xmlNanoHTTPRedir(ret->buf->context);
|
||||
if (redir != NULL) {
|
||||
if (ret->filename != NULL)
|
||||
xmlFree((xmlChar *) ret->filename);
|
||||
if (ret->directory != NULL) {
|
||||
xmlFree((xmlChar *) ret->directory);
|
||||
ret->directory = NULL;
|
||||
}
|
||||
ret->filename =
|
||||
(char *) xmlStrdup((const xmlChar *) redir);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return(ret);
|
||||
}
|
||||
|
||||
static int xmlSysIDExists(const char *URL) {
|
||||
#ifdef HAVE_STAT
|
||||
int ret;
|
||||
@ -3001,19 +3075,20 @@ static int xmlSysIDExists(const char *URL) {
|
||||
*
|
||||
* Returns a new allocated xmlParserInputPtr, or NULL.
|
||||
*/
|
||||
static
|
||||
xmlParserInputPtr
|
||||
static xmlParserInputPtr
|
||||
xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
|
||||
xmlParserCtxtPtr ctxt) {
|
||||
xmlParserCtxtPtr ctxt)
|
||||
{
|
||||
xmlParserInputPtr ret = NULL;
|
||||
xmlChar *resource = NULL;
|
||||
|
||||
#ifdef LIBXML_CATALOG_ENABLED
|
||||
xmlCatalogAllow pref;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_EXTERNAL_ENTITIES
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
|
||||
"xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
|
||||
#endif
|
||||
#ifdef LIBXML_CATALOG_ENABLED
|
||||
/*
|
||||
@ -3023,87 +3098,71 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
|
||||
pref = xmlCatalogGetDefaults();
|
||||
|
||||
if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) {
|
||||
/*
|
||||
* Do a local lookup
|
||||
*/
|
||||
if ((ctxt->catalogs != NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
|
||||
resource = xmlCatalogLocalResolve(ctxt->catalogs,
|
||||
(const xmlChar *)ID,
|
||||
(const xmlChar *)URL);
|
||||
/*
|
||||
* Do a local lookup
|
||||
*/
|
||||
if ((ctxt->catalogs != NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
|
||||
resource = xmlCatalogLocalResolve(ctxt->catalogs,
|
||||
(const xmlChar *) ID,
|
||||
(const xmlChar *) URL);
|
||||
}
|
||||
/*
|
||||
* Try a global lookup
|
||||
*/
|
||||
if ((resource == NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_GLOBAL))) {
|
||||
resource = xmlCatalogResolve((const xmlChar *)ID,
|
||||
(const xmlChar *)URL);
|
||||
}
|
||||
if ((resource == NULL) && (URL != NULL))
|
||||
resource = xmlStrdup((const xmlChar *) URL);
|
||||
/*
|
||||
* Try a global lookup
|
||||
*/
|
||||
if ((resource == NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_GLOBAL))) {
|
||||
resource = xmlCatalogResolve((const xmlChar *) ID,
|
||||
(const xmlChar *) URL);
|
||||
}
|
||||
if ((resource == NULL) && (URL != NULL))
|
||||
resource = xmlStrdup((const xmlChar *) URL);
|
||||
|
||||
/*
|
||||
* TODO: do an URI lookup on the reference
|
||||
*/
|
||||
if ((resource != NULL) && (!xmlSysIDExists((const char *)resource))) {
|
||||
xmlChar *tmp = NULL;
|
||||
/*
|
||||
* TODO: do an URI lookup on the reference
|
||||
*/
|
||||
if ((resource != NULL)
|
||||
&& (!xmlSysIDExists((const char *) resource))) {
|
||||
xmlChar *tmp = NULL;
|
||||
|
||||
if ((ctxt->catalogs != NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
|
||||
tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
|
||||
}
|
||||
if ((tmp == NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_GLOBAL))) {
|
||||
tmp = xmlCatalogResolveURI(resource);
|
||||
}
|
||||
if ((ctxt->catalogs != NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
|
||||
tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
|
||||
}
|
||||
if ((tmp == NULL) &&
|
||||
((pref == XML_CATA_ALLOW_ALL) ||
|
||||
(pref == XML_CATA_ALLOW_GLOBAL))) {
|
||||
tmp = xmlCatalogResolveURI(resource);
|
||||
}
|
||||
|
||||
if (tmp != NULL) {
|
||||
xmlFree(resource);
|
||||
resource = tmp;
|
||||
}
|
||||
}
|
||||
if (tmp != NULL) {
|
||||
xmlFree(resource);
|
||||
resource = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (resource == NULL)
|
||||
resource = (xmlChar *) URL;
|
||||
resource = (xmlChar *) URL;
|
||||
|
||||
if (resource == NULL) {
|
||||
if (ID == NULL)
|
||||
ID = "NULL";
|
||||
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID);
|
||||
return(NULL);
|
||||
if (ID == NULL)
|
||||
ID = "NULL";
|
||||
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID);
|
||||
return (NULL);
|
||||
}
|
||||
ret = xmlNewInputFromFile(ctxt, (const char *)resource);
|
||||
ret = xmlNewInputFromFile(ctxt, (const char *) resource);
|
||||
if (ret == NULL) {
|
||||
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
|
||||
(const char *) resource);
|
||||
return(NULL);
|
||||
}
|
||||
if ((ret->buf != NULL) && (ret->buf->readcallback == xmlIOHTTPRead)) {
|
||||
const char *encoding;
|
||||
const char *redir;
|
||||
|
||||
encoding = xmlNanoHTTPEncoding(ret->buf->context);
|
||||
redir = xmlNanoHTTPRedir(ret->buf->context);
|
||||
if (redir != NULL) {
|
||||
if (ret->filename != NULL)
|
||||
xmlFree((xmlChar *) ret->filename);
|
||||
if (ret->directory != NULL) {
|
||||
xmlFree((xmlChar *) ret->directory);
|
||||
ret->directory = NULL;
|
||||
}
|
||||
ret->filename = (char *) xmlStrdup((const xmlChar *)redir);
|
||||
}
|
||||
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
|
||||
(const char *) resource);
|
||||
}
|
||||
if ((resource != NULL) && (resource != (xmlChar *) URL))
|
||||
xmlFree(resource);
|
||||
return(ret);
|
||||
xmlFree(resource);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
|
||||
|
Loading…
x
Reference in New Issue
Block a user