From a8d8a70c510a79a9850e536edc6838b244acb2ef Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 27 Jan 2025 13:31:08 +0100 Subject: [PATCH] uri: Fix handling of Windows drive letters Allow drive letters in URI paths. Technically, these should be treated as URI schemes, but this is not what users expect. This also makes sure that paths with drive letters are resolved as filesystem paths and unescaped, for example when used in libxslt's document() function. Should fix #832. --- testparser.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ uri.c | 38 ++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/testparser.c b/testparser.c index c28e8bbf..6dfb7ea8 100644 --- a/testparser.c +++ b/testparser.c @@ -789,6 +789,81 @@ testBuildRelativeUri(void) { return err; } +#if defined(_WIN32) || defined(__CYGWIN__) +static int +testWindowsUri(void) { + const char *url = "c:/a%20b/file.txt"; + xmlURIPtr uri; + xmlChar *res; + int err = 0; + int i; + + static const xmlRelativeUriTest tests[] = { + { + "c:/a%20b/file.txt", + "base.xml", + "c:/a b/file.txt" + }, { + "file:///c:/a%20b/file.txt", + "base.xml", + "file:///c:/a%20b/file.txt" + }, { + "Z:/a%20b/file.txt", + "http://example.com/", + "Z:/a b/file.txt" + }, { + "a%20b/b1/c1", + "C:/a/b2/c2", + "C:/a/b2/a b/b1/c1" + }, { + "a%20b/b1/c1", + "\\a\\b2\\c2", + "/a/b2/a b/b1/c1" + }, { + "a%20b/b1/c1", + "\\\\?\\a\\b2\\c2", + "//?/a/b2/a b/b1/c1" + }, { + "a%20b/b1/c1", + "\\\\\\\\server\\b2\\c2", + "//server/b2/a b/b1/c1" + } + }; + + uri = xmlParseURI(url); + if (uri == NULL) { + fprintf(stderr, "xmlParseURI failed\n"); + err = 1; + } else { + if (uri->scheme != NULL) { + fprintf(stderr, "invalid scheme: %s\n", uri->scheme); + err = 1; + } + if (uri->path == NULL || strcmp(uri->path, "c:/a b/file.txt") != 0) { + fprintf(stderr, "invalid path: %s\n", uri->path); + err = 1; + } + + xmlFreeURI(uri); + } + + for (i = 0; (size_t) i < sizeof(tests) / sizeof(tests[0]); i++) { + const xmlRelativeUriTest *test = tests + i; + + res = xmlBuildURI(BAD_CAST test->uri, BAD_CAST test->base); + if (res == NULL || !xmlStrEqual(res, BAD_CAST test->result)) { + fprintf(stderr, "xmlBuildURI failed uri=%s base=%s " + "result=%s expected=%s\n", test->uri, test->base, + res, test->result); + err = 1; + } + xmlFree(res); + } + + return err; +} +#endif /* WIN32 */ + static int charEncConvImplError; static int @@ -913,6 +988,9 @@ main(void) { err |= testWriterClose(); #endif err |= testBuildRelativeUri(); +#if defined(_WIN32) || defined(__CYGWIN__) + err |= testWindowsUri(); +#endif err |= testCharEncConvImpl(); return err; diff --git a/uri.c b/uri.c index 6f2db649..a94acb4d 100644 --- a/uri.c +++ b/uri.c @@ -232,6 +232,15 @@ xmlParse3986Scheme(xmlURIPtr uri, const char **str) { if (!ISA_ALPHA(cur)) return(1); cur++; + +#if defined(_WIN32) || defined(__CYGWIN__) + /* + * Don't treat Windows drive letters as scheme. + */ + if (*cur == ':') + return(1); +#endif + while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; if (uri != NULL) { @@ -583,11 +592,21 @@ xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty) const char *cur; cur = *str; - if (!ISA_PCHAR(uri, cur)) { + if (!ISA_PCHAR(uri, cur) || (*cur == forbid)) { if (empty) return(0); return(1); } + NEXT(cur); + +#if defined(_WIN32) || defined(__CYGWIN__) + /* + * Allow Windows drive letters. + */ + if ((forbid == ':') && (*cur == forbid)) + NEXT(cur); +#endif + while (ISA_PCHAR(uri, cur) && (*cur != forbid)) NEXT(cur); *str = cur; @@ -2070,6 +2089,23 @@ xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) { return(xmlResolvePath(URI, base, valPtr)); } +#if defined(_WIN32) || defined(__CYGWIN__) + /* + * Resolve paths with a Windows drive letter as filesystem path + * even if base has a scheme. + */ + if ((ref != NULL) && (ref->path != NULL)) { + int c = ref->path[0]; + + if ((((c >= 'A') && (c <= 'Z')) || + ((c >= 'a') && (c <= 'z'))) && + (ref->path[1] == ':')) { + xmlFreeURI(ref); + return(xmlResolvePath(URI, base, valPtr)); + } + } +#endif + ret = xmlParseURISafe((const char *) base, &bas); if (ret < 0) goto done;