get rid of the dependancy on a locally installed DTD try to cleanup the

* test/relaxng/docbook_0.xml: get rid of the dependancy on a locally
  installed DTD
* uri.c include/libxml/uri.h xmlIO.c nanoftp.c nanohttp.c: try to
  cleanup the Path/URI conversion mess, needed fixing in various
  layers and a new API to the uri module which also fixes #306861
* runtest.c: integrated a regression test specific to check the
  URI conversions done before calling the I/O handlers.
Daniel
This commit is contained in:
Daniel Veillard 2005-08-07 10:46:19 +00:00
parent 29f6100e69
commit 336a8e13bf
8 changed files with 326 additions and 35 deletions

View File

@ -1,3 +1,13 @@
Sun Aug 7 12:39:35 CEST 2005 Daniel Veillard <daniel@veillard.com>
* test/relaxng/docbook_0.xml: get rid of the dependancy on a locally
installed DTD
* uri.c include/libxml/uri.h xmlIO.c nanoftp.c nanohttp.c: try to
cleanup the Path/URI conversion mess, needed fixing in various
layers and a new API to the uri module which also fixes #306861
* runtest.c: integrated a regression test specific to check the
URI conversions done before calling the I/O handlers.
Sat Aug 6 11:06:24 CEST 2005 Daniel Veillard <daniel@veillard.com>
* doc/XSLT.html doc/xml.html: small doc fix for #312647

View File

@ -47,13 +47,16 @@ struct _xmlURI {
XMLPUBFUN xmlURIPtr XMLCALL
xmlCreateURI (void);
XMLPUBFUN xmlChar * XMLCALL
xmlBuildURI (const xmlChar *URI,
const xmlChar *base);
xmlBuildURI (const xmlChar *URI,
const xmlChar *base);
XMLPUBFUN xmlChar * XMLCALL
xmlBuildRelativeURI (const xmlChar *URI,
const xmlChar *base);
const xmlChar *base);
XMLPUBFUN xmlURIPtr XMLCALL
xmlParseURI (const char *str);
XMLPUBFUN xmlURIPtr XMLCALL
xmlParseURIRaw (const char *str,
int raw);
XMLPUBFUN int XMLCALL
xmlParseURIReference (xmlURIPtr uri,
const char *str);

View File

@ -316,7 +316,7 @@ xmlNanoFTPScanURL(void *ctx, const char *URL) {
}
if (URL == NULL) return;
uri = xmlParseURI(URL);
uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return;
@ -377,7 +377,7 @@ xmlNanoFTPUpdateURL(void *ctx, const char *URL) {
if (ctxt->hostname == NULL)
return(-1);
uri = xmlParseURI(URL);
uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return(-1);
@ -440,7 +440,7 @@ xmlNanoFTPScanProxy(const char *URL) {
#endif
if (URL == NULL) return;
uri = xmlParseURI(URL);
uri = xmlParseURIRaw(URL, 1);
if ((uri == NULL) || (uri->scheme == NULL) ||
(strcmp(uri->scheme, "ftp")) || (uri->server == NULL)) {
__xmlIOErr(XML_FROM_FTP, XML_FTP_URL_SYNTAX, "Syntax Error\n");

View File

@ -293,7 +293,7 @@ xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
}
if (URL == NULL) return;
uri = xmlParseURI(URL);
uri = xmlParseURIRaw(URL, 1);
if (uri == NULL)
return;
@ -346,7 +346,7 @@ xmlNanoHTTPScanProxy(const char *URL) {
#endif
if (URL == NULL) return;
uri = xmlParseURI(URL);
uri = xmlParseURIRaw(URL, 1);
if ((uri == NULL) || (uri->scheme == NULL) ||
(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");

180
runtest.c
View File

@ -2692,6 +2692,178 @@ uriBaseTest(const char *filename,
"http://foo.com/path/to/index.html?orig#help"));
}
static int urip_success = 1;
static int urip_current = 0;
static const char *urip_testURLs[] = {
"urip://example.com/a b.html",
"urip://example.com/a%20b.html",
"file:///path/to/a b.html",
"file:///path/to/a%20b.html",
"/path/to/a b.html",
"/path/to/a%20b.html",
"urip://example.com/résumé.html",
"urip://example.com/test?a=1&b=2%263&c=4#foo",
NULL
};
static const char *urip_rcvsURLs[] = {
/* it is an URI the strings must be escaped */
"urip://example.com/a%20b.html",
/* check that % escaping is not broken */
"urip://example.com/a%20b.html",
/* it's an URI path the strings must be escaped */
"file:///path/to/a%20b.html",
/* check that % escaping is not broken */
"file:///path/to/a%20b.html",
/* this is not an URI, this is a path, so this should not be escaped */
"/path/to/a b.html",
/* check that paths with % are not broken */
"/path/to/a%20b.html",
/* out of context the encoding can't be guessed byte by byte conversion */
"urip://example.com/r%E9sum%E9.html",
/* verify we don't destroy URIs especially the query part */
"urip://example.com/test?a=1&b=2%263&c=4#foo",
NULL
};
static const char *urip_res = "<list/>";
static const char *urip_cur = NULL;
static int urip_rlen;
/**
* uripMatch:
* @URI: an URI to test
*
* Check for an urip: query
*
* Returns 1 if yes and 0 if another Input module should be used
*/
static int
uripMatch(const char * URI) {
if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog")))
return(0);
/* Verify we received the escaped URL */
if (strcmp(urip_rcvsURLs[urip_current], URI))
urip_success = 0;
return(1);
}
/**
* uripOpen:
* @URI: an URI to test
*
* Return a pointer to the urip: query handler, in this example simply
* the urip_current pointer...
*
* Returns an Input context or NULL in case or error
*/
static void *
uripOpen(const char * URI) {
if ((URI == NULL) || (!strcmp(URI, "file:///etc/xml/catalog")))
return(NULL);
/* Verify we received the escaped URL */
if (strcmp(urip_rcvsURLs[urip_current], URI))
urip_success = 0;
urip_cur = urip_res;
urip_rlen = strlen(urip_res);
return((void *) urip_cur);
}
/**
* uripClose:
* @context: the read context
*
* Close the urip: query handler
*
* Returns 0 or -1 in case of error
*/
static int
uripClose(void * context) {
if (context == NULL) return(-1);
urip_cur = NULL;
urip_rlen = 0;
return(0);
}
/**
* uripRead:
* @context: the read context
* @buffer: where to store data
* @len: number of bytes to read
*
* Implement an urip: query read.
*
* Returns the number of bytes read or -1 in case of error
*/
static int
uripRead(void * context, char * buffer, int len) {
const char *ptr = (const char *) context;
if ((context == NULL) || (buffer == NULL) || (len < 0))
return(-1);
if (len > urip_rlen) len = urip_rlen;
memcpy(buffer, ptr, len);
urip_rlen -= len;
return(len);
}
static int
urip_checkURL(const char *URL) {
xmlDocPtr doc;
doc = xmlReadFile(URL, NULL, 0);
if (doc == NULL)
return(-1);
xmlFreeDoc(doc);
return(1);
}
/**
* uriPathTest:
* @filename: ignored
* @result: ignored
* @err: ignored
*
* Run a set of tests to check how Path and URI are handled before
* being passed to the I/O layer
*
* Returns 0 in case of success, an error code otherwise
*/
static int
uriPathTest(const char *filename ATTRIBUTE_UNUSED,
const char *result ATTRIBUTE_UNUSED,
const char *err ATTRIBUTE_UNUSED,
int options ATTRIBUTE_UNUSED) {
int parsed;
int failures = 0;
/*
* register the new I/O handlers
*/
if (xmlRegisterInputCallbacks(uripMatch, uripOpen, uripRead, uripClose) < 0)
{
fprintf(stderr, "failed to register HTTP handler\n");
return(-1);
}
for (urip_current = 0;urip_testURLs[urip_current] != NULL;urip_current++) {
urip_success = 1;
parsed = urip_checkURL(urip_testURLs[urip_current]);
if (urip_success != 1) {
fprintf(stderr, "failed the URL passing test for %s",
urip_testURLs[urip_current]);
failures++;
} else if (parsed != 1) {
fprintf(stderr, "failed the parsing test for %s",
urip_testURLs[urip_current]);
failures++;
}
nb_tests++;
}
xmlPopInputCallbacks();
return(failures);
}
#ifdef LIBXML_SCHEMAS_ENABLED
/************************************************************************
* *
@ -4039,6 +4211,9 @@ testDesc testDescriptions[] = {
{ "URI base composition tests" ,
uriBaseTest, "./test/URI/*.data", "result/URI/", "", NULL,
0 },
{ "Path URI conversion tests" ,
uriPathTest, NULL, NULL, NULL, NULL,
0 },
#ifdef LIBXML_SCHEMAS_ENABLED
{ "Schemas regression tests" ,
schemasTest, "./test/schemas/*_*.xsd", NULL, NULL, NULL,
@ -4170,6 +4345,7 @@ launchTests(testDescPtr tst) {
}
static int verbose = 0;
static int tests_quiet = 0;
static int
runtest(int i) {
@ -4179,7 +4355,7 @@ runtest(int i) {
old_errors = nb_errors;
old_tests = nb_tests;
old_leaks = nb_leaks;
if (testDescriptions[i].desc != NULL)
if ((tests_quiet == 0) && (testDescriptions[i].desc != NULL))
printf("## %s\n", testDescriptions[i].desc);
res = launchTests(&testDescriptions[i]);
if (res != 0)
@ -4207,6 +4383,8 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
for (a = 1; a < argc;a++) {
if (!strcmp(argv[a], "-v"))
verbose = 1;
else if (!strcmp(argv[a], "-quiet"))
tests_quiet = 1;
else {
for (i = 0; testDescriptions[i].func != NULL; i++) {
if (strstr(testDescriptions[i].desc, argv[a])) {

View File

@ -1,6 +1,5 @@
<?xml version="1.0"?>
<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"../dtd/4.1.2/docbookx.dtd" [
<!DOCTYPE article [
<!ENTITY version "1.0.53">
<!ENTITY mdash "--">
<!ENTITY hellip "...">

148
uri.c
View File

@ -185,6 +185,8 @@
* path = [ abs_path | opaque_part ]
*/
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
/************************************************************************
* *
* Generic URI structure functions *
@ -1086,7 +1088,10 @@ xmlParseURIFragment(xmlURIPtr uri, const char **str)
if (uri != NULL) {
if (uri->fragment != NULL)
xmlFree(uri->fragment);
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->fragment = STRNDUP(*str, cur - *str);
else
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@ -1111,12 +1116,16 @@ xmlParseURIQuery(xmlURIPtr uri, const char **str)
if (str == NULL)
return (-1);
while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
while ((IS_URIC(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->query != NULL)
xmlFree(uri->query);
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->query = STRNDUP(*str, cur - *str);
else
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@ -1147,8 +1156,7 @@ xmlParseURIScheme(xmlURIPtr uri, const char **str) {
while (IS_SCHEME(*cur)) cur++;
if (uri != NULL) {
if (uri->scheme != NULL) xmlFree(uri->scheme);
/* !!! strndup */
uri->scheme = xmlURIUnescapeString(*str, cur - *str, NULL);
uri->scheme = STRNDUP(*str, cur - *str);
}
*str = cur;
return(0);
@ -1174,16 +1182,21 @@ xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
return (-1);
cur = *str;
if (!(IS_URIC_NO_SLASH(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
if (!((IS_URIC_NO_SLASH(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
return (3);
}
NEXT(cur);
while (IS_URIC(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
while ((IS_URIC(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->opaque != NULL)
xmlFree(uri->opaque);
uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->opaque = STRNDUP(*str, cur - *str);
else
uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@ -1235,7 +1248,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) {
if (*cur == '@') {
if (uri != NULL) {
if (uri->user != NULL) xmlFree(uri->user);
uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->path = STRNDUP(*str, cur - *str);
else
uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
}
cur++;
} else {
@ -1349,7 +1365,10 @@ xmlParseURIServer(xmlURIPtr uri, const char **str) {
uri->authority = NULL;
if (host[0] != '[') { /* it's not an IPV6 addr */
if (uri->server != NULL) xmlFree(uri->server);
uri->server = xmlURIUnescapeString(host, cur - host, NULL);
if (uri->cleanup & 2)
uri->server = STRNDUP(host, cur - host);
else
uri->server = xmlURIUnescapeString(host, cur - host, NULL);
}
}
/*
@ -1392,16 +1411,21 @@ xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
return (-1);
cur = *str;
if (!(IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))) {
if (!((IS_SEGMENT(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
return (3);
}
NEXT(cur);
while (IS_SEGMENT(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
while ((IS_SEGMENT(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
if (uri != NULL) {
if (uri->path != NULL)
xmlFree(uri->path);
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->path = STRNDUP(*str, cur - *str);
else
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return (0);
@ -1432,11 +1456,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
cur = *str;
do {
while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
while ((IS_PCHAR(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
while (*cur == ';') {
cur++;
while (IS_PCHAR(cur) || ((uri != NULL) && (uri->cleanup) && (IS_UNWISE(cur))))
while ((IS_PCHAR(cur)) ||
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
NEXT(cur);
}
if (*cur != '/')
@ -1472,8 +1498,13 @@ xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
len2++;
}
path[len2] = 0;
if (cur - *str > 0)
xmlURIUnescapeString(*str, cur - *str, &path[len2]);
if (cur - *str > 0) {
if (uri->cleanup & 2) {
memcpy(&path[len2], *str, cur - *str);
path[len2 + (cur - *str)] = 0;
} else
xmlURIUnescapeString(*str, cur - *str, &path[len2]);
}
if (uri->path != NULL)
xmlFree(uri->path);
uri->path = path;
@ -1538,7 +1569,10 @@ xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
if (uri->user != NULL) xmlFree(uri->user);
uri->user = NULL;
if (uri->authority != NULL) xmlFree(uri->authority);
uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->cleanup & 2)
uri->authority = STRNDUP(*str, cur - *str);
else
uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
}
*str = cur;
return(0);
@ -1761,6 +1795,38 @@ xmlParseURI(const char *str) {
return(uri);
}
/**
* xmlParseURIRaw:
* @str: the URI string to analyze
* @raw: if 1 unescaping of URI pieces are disabled
*
* Parse an URI but allows to keep intact the original fragments.
*
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
*
* Returns a newly built xmlURIPtr or NULL in case of error
*/
xmlURIPtr
xmlParseURIRaw(const char *str, int raw) {
xmlURIPtr uri;
int ret;
if (str == NULL)
return(NULL);
uri = xmlCreateURI();
if (uri != NULL) {
if (raw) {
uri->cleanup |= 2;
}
ret = xmlParseURIReference(uri, str);
if (ret) {
xmlFreeURI(uri);
return(NULL);
}
}
return(uri);
}
/************************************************************************
* *
* Public functions *
@ -2229,8 +2295,9 @@ xmlCanonicPath(const xmlChar *path)
int i = 0;
xmlChar *p = NULL;
#endif
xmlChar *ret;
xmlURIPtr uri;
xmlChar *ret;
const xmlChar *absuri;
if (path == NULL)
return(NULL);
@ -2239,12 +2306,47 @@ xmlCanonicPath(const xmlChar *path)
return xmlStrdup(path);
}
absuri = xmlStrstr(path, BAD_CAST "://");
if (absuri != NULL) {
int l, j;
unsigned char c;
xmlChar *escURI;
/*
* this looks like an URI where some parts have not been
* escaped leading to a parsing problem check that the first
* part matches a protocol.
*/
l = absuri - path;
if ((l <= 0) || (l > 20))
goto path_processing;
for (j = 0;j < l;j++) {
c = path[j];
if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
goto path_processing;
}
escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
if (escURI != NULL) {
uri = xmlParseURI((const char *) escURI);
if (uri != NULL) {
xmlFreeURI(uri);
return escURI;
}
xmlFreeURI(uri);
}
}
path_processing:
#if defined(_WIN32) && !defined(__CYGWIN__)
/*
* This really need to be cleaned up by someone with a Windows box
*/
uri = xmlCreateURI();
if (uri == NULL) {
return(NULL);
}
#if defined(_WIN32) && !defined(__CYGWIN__)
len = xmlStrlen(path);
if ((len > 2) && IS_WINDOWS_PATH(path)) {
uri->scheme = xmlStrdup(BAD_CAST "file");
@ -2261,15 +2363,15 @@ xmlCanonicPath(const xmlChar *path)
*p = '/';
p++;
}
#else
uri->path = (char *) xmlStrdup((const xmlChar *) path);
#endif
if (uri->path == NULL) {
xmlFreeURI(uri);
return(NULL);
}
ret = xmlSaveUri(uri);
xmlFreeURI(uri);
#else
ret = xmlStrdup((const xmlChar *) path);
#endif
return(ret);
}

View File

@ -3533,7 +3533,6 @@ xmlGetExternalEntityLoader(void) {
*
* Load an external entity, note that the use of this function for
* unparsed entities may generate problems
* TODO: a more generic External entity API must be designed
*
* Returns the xmlParserInputPtr or NULL
*/