Fixed the HTTP<->parser interraction, which should fix 2 long standing

* include/libxml/nanohttp.h include/libxml/parserInternals.h
  include/libxml/xmlIO.h nanohttp.c parserInternals.c xmlIO.c:
  Fixed the HTTP<->parser interraction, which should fix 2 long
  standing bugs #104790 and #124054 , this also fix the fact that
  HTTP error code (> 400) should not generate data, we usually
  don't want to parse the HTML error information instead of the
  resource looked at.
Daniel
This commit is contained in:
Daniel Veillard 2003-10-19 13:35:37 +00:00
parent fc60fc2009
commit a840b69261
7 changed files with 419 additions and 257 deletions

View File

@ -1,3 +1,13 @@
Sun Oct 19 15:31:43 CEST 2003 Daniel Veillard <daniel@veillard.com>
* include/libxml/nanohttp.h include/libxml/parserInternals.h
include/libxml/xmlIO.h nanohttp.c parserInternals.c xmlIO.c:
Fixed the HTTP<->parser interraction, which should fix 2 long
standing bugs #104790 and #124054 , this also fix the fact that
HTTP error code (> 400) should not generate data, we usually
don't want to parse the HTML error information instead of the
resource looked at.
Sun Oct 19 19:20:48 HKT 2003 William Brack <wbrack@mmm.com.hk>
* doc/Makefile.am: enhanced the installation of tutorial files

View File

@ -53,9 +53,11 @@ XMLPUBFUN int XMLCALL
XMLPUBFUN const char * XMLCALL
xmlNanoHTTPAuthHeader (void *ctx);
XMLPUBFUN const char * XMLCALL
xmlNanoHTTPRedir (void * ctx);
xmlNanoHTTPRedir (void *ctx);
XMLPUBFUN const char * XMLCALL
xmlNanoHTTPEncoding (void * ctx);
xmlNanoHTTPEncoding (void *ctx);
XMLPUBFUN const char * XMLCALL
xmlNanoHTTPMimeType (void *ctx);
XMLPUBFUN int XMLCALL
xmlNanoHTTPRead (void *ctx,
void *dest,

View File

@ -271,8 +271,21 @@ XMLPUBFUN int XMLCALL
xmlCharEncoding enc);
XMLPUBFUN int XMLCALL
xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
xmlCharEncodingHandlerPtr handler);
xmlCharEncodingHandlerPtr handler);
XMLPUBFUN int XMLCALL
xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,
xmlParserInputPtr input,
xmlCharEncodingHandlerPtr handler);
#ifdef IN_LIBXML
/* internal error reporting */
XMLPUBFUN void XMLCALL
__xmlErrEncoding (xmlParserCtxtPtr ctxt,
xmlParserErrors error,
const char *msg,
const xmlChar * str1,
const xmlChar * str2);
#endif
/**
* Entities
*/

View File

@ -260,6 +260,9 @@ XMLPUBFUN void * XMLCALL
XMLPUBFUN void XMLCALL
xmlRegisterHTTPPostCallbacks (void );
#endif
XMLPUBFUN xmlParserInputPtr XMLCALL
xmlCheckHTTPInput (xmlParserCtxtPtr ctxt,
xmlParserInputPtr ret);
/*
* A predefined entity loader disabling network accesses

View File

@ -150,6 +150,7 @@ typedef struct xmlNanoHTTPCtxt {
char *location; /* the new URL in case of redirect */
char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
char *encoding; /* encoding extracted from the contentType */
char *mimeType; /* Mime-Type extracted from the contentType */
} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
static int initialized = 0;
@ -530,6 +531,7 @@ xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
if (ctxt->in != NULL) xmlFree(ctxt->in);
if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
if (ctxt->location != NULL) xmlFree(ctxt->location);
if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
ctxt->state = XML_NANO_HTTP_NONE;
@ -737,7 +739,7 @@ xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
* Try to extract useful informations from the server answer.
* We currently parse and process:
* - The HTTP revision/ return code
* - The Content-Type
* - The Content-Type, Mime-Type and charset used
* - The Location for redirect processing.
*
* Returns -1 in case of failure, the file descriptor number otherwise
@ -781,16 +783,56 @@ xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
ctxt->returnValue = ret;
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
const xmlChar *charset, *last, *mime;
cur += 13;
while ((*cur == ' ') || (*cur == '\t')) cur++;
if (ctxt->contentType != NULL)
xmlFree(ctxt->contentType);
ctxt->contentType = xmlMemStrdup(cur);
mime = (const xmlChar *) cur;
last = mime;
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
(*last != ';') && (*last != ','))
last++;
if (ctxt->mimeType != NULL)
xmlFree(ctxt->mimeType);
ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
if (charset != NULL) {
charset += 8;
last = charset;
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
(*last != ';') && (*last != ','))
last++;
if (ctxt->encoding != NULL)
xmlFree(ctxt->encoding);
ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
}
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
const xmlChar *charset, *last, *mime;
cur += 12;
if (ctxt->contentType != NULL) return;
while ((*cur == ' ') || (*cur == '\t')) cur++;
ctxt->contentType = xmlMemStrdup(cur);
mime = (const xmlChar *) cur;
last = mime;
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
(*last != ';') && (*last != ','))
last++;
if (ctxt->mimeType != NULL)
xmlFree(ctxt->mimeType);
ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
if (charset != NULL) {
charset += 8;
last = charset;
while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
(*last != ';') && (*last != ','))
last++;
if (ctxt->encoding != NULL)
xmlFree(ctxt->encoding);
ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
}
} else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
cur += 9;
while ((*cur == ' ') || (*cur == '\t')) cur++;
@ -1227,6 +1269,7 @@ retry:
ctxt = xmlNanoHTTPNewCtxt(URL);
else {
ctxt = xmlNanoHTTPNewCtxt(redirURL);
ctxt->location = xmlMemStrdup(redirURL);
}
if ( ctxt == NULL ) {
@ -1608,6 +1651,21 @@ xmlNanoHTTPEncoding( void * ctx ) {
return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
}
/**
* xmlNanoHTTPMimeType:
* @ctx: the HTTP context
*
* Provides the specified Mime-Type if specified in the HTTP headers.
*
* Return the specified Mime-Type or NULL if not available
*/
const char *
xmlNanoHTTPMimeType( void * ctx ) {
xmlNanoHTTPCtxtPtr ctxt = ctx;
return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
}
/**
* xmlNanoHTTPFetchContent:
* @ctx: the HTTP context

View File

@ -122,7 +122,7 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
}
/**
* xmlErrEncoding:
* __xmlErrEncoding:
* @ctxt: an XML parser context
* @error: the error number
* @msg: the error message
@ -131,9 +131,9 @@ xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
*
* Handle an encoding error
*/
static void
xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * str1, const xmlChar * str2)
void
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors error,
const char *msg, const xmlChar * str1, const xmlChar * str2)
{
if (ctxt != NULL)
ctxt->errNo = error;
@ -558,7 +558,7 @@ encoding_error:
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
*/
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
@ -704,7 +704,7 @@ encoding_error:
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
*/
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
@ -804,7 +804,7 @@ encoding_error:
* to ISO-Latin-1 (if you don't like this policy, just declare the
* encoding !)
*/
xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
"Input is not proper UTF-8, indicate encoding !\n",
NULL, NULL);
if ((ctxt != NULL) && (ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
@ -901,7 +901,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
switch (enc) {
case XML_CHAR_ENCODING_ERROR:
xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
"encoding unknown\n", NULL, NULL);
break;
case XML_CHAR_ENCODING_NONE:
@ -951,7 +951,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
*/
switch (enc) {
case XML_CHAR_ENCODING_ERROR:
xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
"encoding unknown\n", NULL, NULL);
break;
case XML_CHAR_ENCODING_NONE:
@ -968,32 +968,32 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
case XML_CHAR_ENCODING_UTF16BE:
break;
case XML_CHAR_ENCODING_UCS4LE:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "USC4 little endian", NULL);
break;
case XML_CHAR_ENCODING_UCS4BE:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "USC4 big endian", NULL);
break;
case XML_CHAR_ENCODING_EBCDIC:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "EBCDIC", NULL);
break;
case XML_CHAR_ENCODING_UCS4_2143:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS4 2143", NULL);
break;
case XML_CHAR_ENCODING_UCS4_3412:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS4 3412", NULL);
break;
case XML_CHAR_ENCODING_UCS2:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "UCS2", NULL);
break;
@ -1020,17 +1020,17 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
ctxt->charset = enc;
return(0);
case XML_CHAR_ENCODING_2022_JP:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "ISO-2022-JP", NULL);
break;
case XML_CHAR_ENCODING_SHIFT_JIS:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "Shift_JIS", NULL);
break;
case XML_CHAR_ENCODING_EUC_JP:
xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"encoding not supported %s\n",
BAD_CAST "EUC-JP", NULL);
break;
@ -1042,6 +1042,175 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
return(xmlSwitchToEncoding(ctxt, handler));
}
/**
* xmlSwitchInputEncoding:
* @ctxt: the parser context
* @input: the input stream
* @handler: the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity.
*
* Returns 0 in case of success, -1 otherwise
*/
int
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
xmlCharEncodingHandlerPtr handler)
{
int nbchars;
if (handler == NULL)
return (-1);
if (input == NULL)
return (-1);
if (input->buf != NULL) {
if (input->buf->encoder != NULL) {
/*
* Check in case the auto encoding detetection triggered
* in already.
*/
if (input->buf->encoder == handler)
return (0);
/*
* "UTF-16" can be used for both LE and BE
if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
BAD_CAST "UTF-16", 6)) &&
(!xmlStrncmp(BAD_CAST handler->name,
BAD_CAST "UTF-16", 6))) {
return(0);
}
*/
/*
* Note: this is a bit dangerous, but that's what it
* takes to use nearly compatible signature for different
* encodings.
*/
xmlCharEncCloseFunc(input->buf->encoder);
input->buf->encoder = handler;
return (0);
}
input->buf->encoder = handler;
/*
* Is there already some content down the pipe to convert ?
*/
if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
int processed;
/*
* Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16LE")) &&
(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
input->cur += 2;
}
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16BE")) &&
(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
input->cur += 2;
}
/*
* Errata on XML-1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF-8
*/
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-8")) &&
(input->cur[0] == 0xEF) &&
(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
input->cur += 3;
}
/*
* Shrink the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
xmlBufferShrink(input->buf->buffer, processed);
input->buf->raw = input->buf->buffer;
input->buf->buffer = xmlBufferCreate();
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInFunc(input->buf->encoder,
input->buf->buffer,
input->buf->raw);
} else {
/*
* convert just enough to get
* '<?xml version="1.0" encoding="xxx"?>'
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
nbchars = xmlCharEncFirstLine(input->buf->encoder,
input->buf->buffer,
input->buf->raw);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
"switching encoding: encoder error\n",
NULL);
return (-1);
}
input->base = input->cur = input->buf->buffer->content;
input->end = &input->base[input->buf->buffer->use];
}
return (0);
} else {
if ((input->length == 0) || (input->buf == NULL)) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer.
*/
xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
return (-1);
} else {
int processed;
/*
* Shrink the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = input->cur - input->base;
input->buf->raw = xmlBufferCreate();
xmlBufferAdd(input->buf->raw, input->cur,
input->length - processed);
input->buf->buffer = xmlBufferCreate();
/*
* convert as much as possible of the raw input
* to the parser reading buffer.
*/
nbchars = xmlCharEncInFunc(input->buf->encoder,
input->buf->buffer,
input->buf->raw);
if (nbchars < 0) {
xmlErrInternal(ctxt,
"switching encoding: encoder error\n",
NULL);
return (-1);
}
/*
* Conversion succeeded, get rid of the old buffer
*/
if ((input->free != NULL) && (input->base != NULL))
input->free((xmlChar *) input->base);
input->base = input->cur = input->buf->buffer->content;
input->end = &input->base[input->buf->buffer->use];
}
}
return (0);
}
/**
* xmlSwitchToEncoding:
* @ctxt: the parser context
@ -1055,165 +1224,9 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
int
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
{
int nbchars;
if (handler != NULL) {
if (ctxt->input != NULL) {
if (ctxt->input->buf != NULL) {
if (ctxt->input->buf->encoder != NULL) {
/*
* Check in case the auto encoding detetection triggered
* in already.
*/
if (ctxt->input->buf->encoder == handler)
return(0);
/*
* "UTF-16" can be used for both LE and BE
if ((!xmlStrncmp(BAD_CAST ctxt->input->buf->encoder->name,
BAD_CAST "UTF-16", 6)) &&
(!xmlStrncmp(BAD_CAST handler->name,
BAD_CAST "UTF-16", 6))) {
return(0);
}
*/
/*
* Note: this is a bit dangerous, but that's what it
* takes to use nearly compatible signature for different
* encodings.
*/
xmlCharEncCloseFunc(ctxt->input->buf->encoder);
ctxt->input->buf->encoder = handler;
return(0);
}
ctxt->input->buf->encoder = handler;
/*
* Is there already some content down the pipe to convert ?
*/
if ((ctxt->input->buf->buffer != NULL) &&
(ctxt->input->buf->buffer->use > 0)) {
int processed;
/*
* Specific handling of the Byte Order Mark for
* UTF-16
*/
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16LE")) &&
(ctxt->input->cur[0] == 0xFF) &&
(ctxt->input->cur[1] == 0xFE)) {
ctxt->input->cur += 2;
}
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-16BE")) &&
(ctxt->input->cur[0] == 0xFE) &&
(ctxt->input->cur[1] == 0xFF)) {
ctxt->input->cur += 2;
}
/*
* Errata on XML-1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF-8
*/
if ((handler->name != NULL) &&
(!strcmp(handler->name, "UTF-8")) &&
(ctxt->input->cur[0] == 0xEF) &&
(ctxt->input->cur[1] == 0xBB) &&
(ctxt->input->cur[2] == 0xBF)) {
ctxt->input->cur += 3;
}
/*
* Shrink the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = ctxt->input->cur - ctxt->input->base;
xmlBufferShrink(ctxt->input->buf->buffer, processed);
ctxt->input->buf->raw = ctxt->input->buf->buffer;
ctxt->input->buf->buffer = xmlBufferCreate();
if (ctxt->html) {
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
} else {
/*
* convert just enough to get
* '<?xml version="1.0" encoding="xxx"?>'
* parsed with the autodetected encoding
* into the parser reading buffer.
*/
nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
}
if (nbchars < 0) {
xmlErrInternal(ctxt,
"xmlSwitchToEncoding: encoder error\n",
NULL);
return(-1);
}
ctxt->input->base =
ctxt->input->cur = ctxt->input->buf->buffer->content;
ctxt->input->end =
&ctxt->input->base[ctxt->input->buf->buffer->use];
}
return(0);
} else {
if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer.
*/
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
return(-1);
} else {
int processed;
/*
* Shrink the current input buffer.
* Move it as the raw buffer and create a new input buffer
*/
processed = ctxt->input->cur - ctxt->input->base;
ctxt->input->buf->raw = xmlBufferCreate();
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
ctxt->input->length - processed);
ctxt->input->buf->buffer = xmlBufferCreate();
/*
* convert as much as possible of the raw input
* to the parser reading buffer.
*/
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
ctxt->input->buf->buffer,
ctxt->input->buf->raw);
if (nbchars < 0) {
xmlErrInternal(ctxt,
"xmlSwitchToEncoding: encoder error\n",
NULL);
return(-1);
}
/*
* Conversion succeeded, get rid of the old buffer
*/
if ((ctxt->input->free != NULL) &&
(ctxt->input->base != NULL))
ctxt->input->free((xmlChar *) ctxt->input->base);
ctxt->input->base =
ctxt->input->cur = ctxt->input->buf->buffer->content;
ctxt->input->end =
&ctxt->input->base[ctxt->input->buf->buffer->use];
}
}
xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
} else {
xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
NULL);
@ -1226,7 +1239,6 @@ xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
} else
return(-1);
return(0);
}
/************************************************************************
@ -1417,7 +1429,7 @@ xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
* @ctxt: an XML parser context
* @filename: the filename to use as entity
*
* Create a new input stream based on a file.
* Create a new input stream based on a file or an URL.
*
* Returns the new input stream or NULL in case of error
*/
@ -1436,20 +1448,25 @@ xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
if (buf == NULL)
return(NULL);
URI = xmlStrdup((xmlChar *) filename);
directory = xmlParserGetDirectory((const char *) URI);
inputStream = xmlNewInputStream(ctxt);
if (inputStream == NULL) {
if (directory != NULL) xmlFree((char *) directory);
if (URI != NULL) xmlFree((char *) URI);
return(NULL);
}
inputStream->buf = buf;
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
if (inputStream == NULL)
return(NULL);
if (inputStream->filename == NULL)
URI = xmlStrdup((xmlChar *) filename);
else
URI = xmlStrdup((xmlChar *) inputStream->filename);
directory = xmlParserGetDirectory((const char *) URI);
inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
if (URI != NULL) xmlFree((char *) URI);
inputStream->directory = directory;
inputStream->buf = buf;
inputStream->base = inputStream->buf->buffer->content;
inputStream->cur = inputStream->buf->buffer->content;

201
xmlIO.c
View File

@ -2961,6 +2961,80 @@ xmlParserGetDirectory(const char *filename) {
* *
****************************************************************/
/**
* xmlCheckHTTPInput:
* @ctxt: an XML parser context
* @ret: an XML parser input
*
* Check an input in case it was created from an HTTP stream, in that
* case it will handle encoding and update of the base URL in case of
* redirection. It also checks for HTTP errors in which case the input
* is cleanly freed up and an appropriate error is raised in context
*
* Returns the input or NULL in case of HTTP error.
*/
xmlParserInputPtr
xmlCheckHTTPInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr ret) {
#ifdef LIBXML_HTTP_ENABLED
if ((ret != NULL) && (ret->buf != NULL) &&
(ret->buf->readcallback == xmlIOHTTPRead) &&
(ret->buf->context != NULL)) {
const char *encoding;
const char *redir;
const char *mime;
int code;
code = xmlNanoHTTPReturnCode(ret->buf->context);
if (code >= 400) {
/* fatal error */
if (ret->filename != NULL)
xmlLoaderErr(ctxt, "failed to load HTTP resource \"%s\"\n",
(const char *) ret->filename);
else
xmlLoaderErr(ctxt, "failed to load HTTP resource\n", NULL);
xmlFreeInputStream(ret);
ret = NULL;
} else {
mime = xmlNanoHTTPMimeType(ret->buf->context);
if ((xmlStrstr(BAD_CAST mime, BAD_CAST "/xml")) ||
(xmlStrstr(BAD_CAST mime, BAD_CAST "+xml"))) {
encoding = xmlNanoHTTPEncoding(ret->buf->context);
if (encoding != NULL) {
xmlCharEncodingHandlerPtr handler;
handler = xmlFindCharEncodingHandler(encoding);
if (handler != NULL) {
xmlSwitchInputEncoding(ctxt, ret, handler);
} else {
__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
"Unknown encoding %s",
BAD_CAST encoding, NULL);
}
if (ret->encoding == NULL)
ret->encoding = xmlStrdup(BAD_CAST encoding);
}
#if 0
} else if (xmlStrstr(BAD_CAST mime, BAD_CAST "html")) {
#endif
}
redir = xmlNanoHTTPRedir(ret->buf->context);
if (redir != NULL) {
if (ret->filename != NULL)
xmlFree((xmlChar *) ret->filename);
if (ret->directory != NULL) {
xmlFree((xmlChar *) ret->directory);
ret->directory = NULL;
}
ret->filename =
(char *) xmlStrdup((const xmlChar *) redir);
}
}
}
#endif
return(ret);
}
static int xmlSysIDExists(const char *URL) {
#ifdef HAVE_STAT
int ret;
@ -3001,19 +3075,20 @@ static int xmlSysIDExists(const char *URL) {
*
* Returns a new allocated xmlParserInputPtr, or NULL.
*/
static
xmlParserInputPtr
static xmlParserInputPtr
xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
xmlParserCtxtPtr ctxt) {
xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr ret = NULL;
xmlChar *resource = NULL;
#ifdef LIBXML_CATALOG_ENABLED
xmlCatalogAllow pref;
#endif
#ifdef DEBUG_EXTERNAL_ENTITIES
xmlGenericError(xmlGenericErrorContext,
"xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
"xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
#endif
#ifdef LIBXML_CATALOG_ENABLED
/*
@ -3023,87 +3098,71 @@ xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
pref = xmlCatalogGetDefaults();
if ((pref != XML_CATA_ALLOW_NONE) && (!xmlSysIDExists(URL))) {
/*
* Do a local lookup
*/
if ((ctxt->catalogs != NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
resource = xmlCatalogLocalResolve(ctxt->catalogs,
(const xmlChar *)ID,
(const xmlChar *)URL);
/*
* Do a local lookup
*/
if ((ctxt->catalogs != NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
resource = xmlCatalogLocalResolve(ctxt->catalogs,
(const xmlChar *) ID,
(const xmlChar *) URL);
}
/*
* Try a global lookup
*/
if ((resource == NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_GLOBAL))) {
resource = xmlCatalogResolve((const xmlChar *)ID,
(const xmlChar *)URL);
}
if ((resource == NULL) && (URL != NULL))
resource = xmlStrdup((const xmlChar *) URL);
/*
* Try a global lookup
*/
if ((resource == NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_GLOBAL))) {
resource = xmlCatalogResolve((const xmlChar *) ID,
(const xmlChar *) URL);
}
if ((resource == NULL) && (URL != NULL))
resource = xmlStrdup((const xmlChar *) URL);
/*
* TODO: do an URI lookup on the reference
*/
if ((resource != NULL) && (!xmlSysIDExists((const char *)resource))) {
xmlChar *tmp = NULL;
/*
* TODO: do an URI lookup on the reference
*/
if ((resource != NULL)
&& (!xmlSysIDExists((const char *) resource))) {
xmlChar *tmp = NULL;
if ((ctxt->catalogs != NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
}
if ((tmp == NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_GLOBAL))) {
tmp = xmlCatalogResolveURI(resource);
}
if ((ctxt->catalogs != NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_DOCUMENT))) {
tmp = xmlCatalogLocalResolveURI(ctxt->catalogs, resource);
}
if ((tmp == NULL) &&
((pref == XML_CATA_ALLOW_ALL) ||
(pref == XML_CATA_ALLOW_GLOBAL))) {
tmp = xmlCatalogResolveURI(resource);
}
if (tmp != NULL) {
xmlFree(resource);
resource = tmp;
}
}
if (tmp != NULL) {
xmlFree(resource);
resource = tmp;
}
}
}
#endif
if (resource == NULL)
resource = (xmlChar *) URL;
resource = (xmlChar *) URL;
if (resource == NULL) {
if (ID == NULL)
ID = "NULL";
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID);
return(NULL);
if (ID == NULL)
ID = "NULL";
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n", ID);
return (NULL);
}
ret = xmlNewInputFromFile(ctxt, (const char *)resource);
ret = xmlNewInputFromFile(ctxt, (const char *) resource);
if (ret == NULL) {
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
(const char *) resource);
return(NULL);
}
if ((ret->buf != NULL) && (ret->buf->readcallback == xmlIOHTTPRead)) {
const char *encoding;
const char *redir;
encoding = xmlNanoHTTPEncoding(ret->buf->context);
redir = xmlNanoHTTPRedir(ret->buf->context);
if (redir != NULL) {
if (ret->filename != NULL)
xmlFree((xmlChar *) ret->filename);
if (ret->directory != NULL) {
xmlFree((xmlChar *) ret->directory);
ret->directory = NULL;
}
ret->filename = (char *) xmlStrdup((const xmlChar *)redir);
}
xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
(const char *) resource);
}
if ((resource != NULL) && (resource != (xmlChar *) URL))
xmlFree(resource);
return(ret);
xmlFree(resource);
return (ret);
}
static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =