1
0
mirror of https://github.com/libuv/libuv synced 2025-03-28 21:13:16 +00:00

unix,win: support IDNA 2008 in uv_getaddrinfo()

Encode domain names before passing them on to the libc resolver.
Some getaddrinfo() implementations support IDNA 2008, some only
IDNA 2003 and some don't support i18n domain names at all.

This is a potential security issue because it means a domain name
might resolve differently depending on the system that libuv is
running on.

Fixes: https://github.com/libuv/libuv/issues/2028
PR-URL: https://github.com/libuv/libuv/pull/2046
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
This commit is contained in:
Ben Noordhuis 2018-10-20 01:28:16 +02:00
parent 143da93e2d
commit 6dd44caa35
10 changed files with 566 additions and 2 deletions

View File

@ -13,6 +13,7 @@ endif()
set(uv_sources
src/fs-poll.c
src/idna.c
src/inet.c
src/threadpool.c
src/timer.c
@ -64,6 +65,7 @@ set(uv_test_sources
test/test-homedir.c
test/test-hrtime.c
test/test-idle.c
test/test-idna.c
test/test-ip4-addr.c
test/test-ip6-addr.c
test/test-ip6-addr.c

View File

@ -29,6 +29,7 @@ libuv_la_CFLAGS = @CFLAGS@
libuv_la_LDFLAGS = -no-undefined -version-info 1:0:0
libuv_la_SOURCES = src/fs-poll.c \
src/heap-inl.h \
src/idna.c \
src/inet.c \
src/queue.h \
src/threadpool.c \
@ -189,6 +190,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \
test/test-homedir.c \
test/test-hrtime.c \
test/test-idle.c \
test/test-idna.c \
test/test-ip4-addr.c \
test/test-ip6-addr.c \
test/test-ipc-heavy-traffic-deadlock-bug.c \

291
src/idna.c Normal file
View File

@ -0,0 +1,291 @@
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Derived from https://github.com/bnoordhuis/punycode
* but updated to support IDNA 2008.
*/
#include "uv.h"
#include "idna.h"
#include <string.h>
static unsigned uv__utf8_decode1_slow(const char** p,
const char* pe,
unsigned a) {
unsigned b;
unsigned c;
unsigned d;
unsigned min;
if (a > 0xF7)
return -1;
switch (*p - pe) {
default:
if (a > 0xEF) {
min = 0x10000;
a = a & 7;
b = (unsigned char) *(*p)++;
c = (unsigned char) *(*p)++;
d = (unsigned char) *(*p)++;
break;
}
/* Fall through. */
case 2:
if (a > 0xDF) {
min = 0x800;
b = 0x80 | (a & 15);
c = (unsigned char) *(*p)++;
d = (unsigned char) *(*p)++;
a = 0;
break;
}
/* Fall through. */
case 1:
if (a > 0xBF) {
min = 0x80;
b = 0x80;
c = 0x80 | (a & 31);
d = (unsigned char) *(*p)++;
a = 0;
break;
}
return -1; /* Invalid continuation byte. */
}
if (0x80 != (0xC0 & (b ^ c ^ d)))
return -1; /* Invalid sequence. */
b &= 63;
c &= 63;
d &= 63;
a = (a << 18) | (b << 12) | (c << 6) | d;
if (a < min)
return -1; /* Overlong sequence. */
if (a > 0x10FFFF)
return -1; /* Four-byte sequence > U+10FFFF. */
if (a >= 0xD800 && a <= 0xDFFF)
return -1; /* Surrogate pair. */
return a;
}
unsigned uv__utf8_decode1(const char** p, const char* pe) {
unsigned a;
a = (unsigned char) *(*p)++;
if (a < 128)
return a; /* ASCII, common case. */
return uv__utf8_decode1_slow(p, pe, a);
}
#define foreach_codepoint(c, p, pe) \
for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
static int uv__idna_toascii_label(const char* s, const char* se,
char** d, char* de) {
static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
const char* ss;
unsigned c;
unsigned h;
unsigned k;
unsigned n;
unsigned m;
unsigned q;
unsigned t;
unsigned x;
unsigned y;
unsigned bias;
unsigned delta;
unsigned todo;
int first;
h = 0;
ss = s;
todo = 0;
foreach_codepoint(c, &s, se) {
if (c < 128)
h++;
else if (c == (unsigned) -1)
return UV_EINVAL;
else
todo++;
}
if (todo > 0) {
if (*d < de) *(*d)++ = 'x';
if (*d < de) *(*d)++ = 'n';
if (*d < de) *(*d)++ = '-';
if (*d < de) *(*d)++ = '-';
}
x = 0;
s = ss;
foreach_codepoint(c, &s, se) {
if (c > 127)
continue;
if (*d < de)
*(*d)++ = c;
if (++x == h)
break; /* Visited all ASCII characters. */
}
if (todo == 0)
return h;
/* Only write separator when we've written ASCII characters first. */
if (h > 0)
if (*d < de)
*(*d)++ = '-';
n = 128;
bias = 72;
delta = 0;
first = 1;
while (todo > 0) {
m = -1;
s = ss;
foreach_codepoint(c, &s, se)
if (c >= n)
if (c < m)
m = c;
x = m - n;
y = h + 1;
if (x > ~delta / y)
return UV_E2BIG; /* Overflow. */
delta += x * y;
n = m;
s = ss;
foreach_codepoint(c, &s, se) {
if (c < n)
if (++delta == 0)
return UV_E2BIG; /* Overflow. */
if (c != n)
continue;
for (k = 36, q = delta; /* empty */; k += 36) {
t = 1;
if (k > bias)
t = k - bias;
if (t > 26)
t = 26;
if (q < t)
break;
/* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
* 10 <= y <= 35, we can optimize the long division
* into a table-based reciprocal multiplication.
*/
x = q - t;
y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */
q = x / y;
t = t + x % y; /* 1 <= t <= 35 because of y. */
if (*d < de)
*(*d)++ = alphabet[t];
}
if (*d < de)
*(*d)++ = alphabet[q];
delta /= 2;
if (first) {
delta /= 350;
first = 0;
}
/* No overflow check is needed because |delta| was just
* divided by 2 and |delta+delta >= delta + delta/h|.
*/
h++;
delta += delta / h;
for (bias = 0; delta > 35 * 26 / 2; bias += 36)
delta /= 35;
bias += 36 * delta / (delta + 38);
delta = 0;
todo--;
}
delta++;
n++;
}
return 0;
}
#undef foreach_codepoint
long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
const char* si;
const char* st;
unsigned c;
char* ds;
int rc;
ds = d;
for (si = s; si < se; /* empty */) {
st = si;
c = uv__utf8_decode1(&si, se);
if (c != '.')
if (c != 0x3002) /* 。 */
if (c != 0xFF0E) /* */
if (c != 0xFF61) /* 。 */
continue;
rc = uv__idna_toascii_label(s, st, &d, de);
if (rc < 0)
return rc;
if (d < de)
*d++ = '.';
s = si;
}
if (s < se) {
rc = uv__idna_toascii_label(s, se, &d, de);
if (rc < 0)
return rc;
}
if (d < de)
*d++ = '\0';
return d - ds; /* Number of bytes written. */
}

31
src/idna.h Normal file
View File

@ -0,0 +1,31 @@
/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef UV_SRC_IDNA_H_
#define UV_SRC_IDNA_H_
/* Decode a single codepoint. Returns the codepoint or UINT32_MAX on error.
* |p| is updated on success _and_ error, i.e., bad multi-byte sequences are
* skipped in their entirety, not just the first bad byte.
*/
unsigned uv__utf8_decode1(const char** p, const char* pe);
/* Convert a UTF-8 domain name to IDNA 2008 / Punycode. A return value >= 0
* is the number of bytes written to |d|, including the trailing nul byte.
* A return value < 0 is a libuv error code. |s| and |d| can not overlap.
*/
long uv__idna_toascii(const char* s, const char* se, char* d, char* de);
#endif /* UV_SRC_IDNA_H_ */

View File

@ -27,6 +27,7 @@
#include "uv.h"
#include "internal.h"
#include "idna.h"
#include <errno.h>
#include <stddef.h> /* NULL */
@ -141,15 +142,34 @@ int uv_getaddrinfo(uv_loop_t* loop,
const char* hostname,
const char* service,
const struct addrinfo* hints) {
char hostname_ascii[256];
size_t hostname_len;
size_t service_len;
size_t hints_len;
size_t len;
char* buf;
long rc;
if (req == NULL || (hostname == NULL && service == NULL))
return UV_EINVAL;
/* FIXME(bnoordhuis) IDNA does not seem to work z/OS,
* probably because it uses EBCDIC rather than ASCII.
*/
#ifdef __MVS__
(void) &hostname_ascii;
#else
if (hostname != NULL) {
rc = uv__idna_toascii(hostname,
hostname + strlen(hostname),
hostname_ascii,
hostname_ascii + sizeof(hostname_ascii));
if (rc < 0)
return rc;
hostname = hostname_ascii;
}
#endif
hostname_len = hostname ? strlen(hostname) + 1 : 0;
service_len = service ? strlen(service) + 1 : 0;
hints_len = hints ? sizeof(*hints) : 0;

View File

@ -24,6 +24,7 @@
#include "uv.h"
#include "internal.h"
#include "req-inl.h"
#include "idna.h"
/* EAI_* constants. */
#include <winsock2.h>
@ -259,11 +260,13 @@ int uv_getaddrinfo(uv_loop_t* loop,
const char* node,
const char* service,
const struct addrinfo* hints) {
char hostname_ascii[256];
int nodesize = 0;
int servicesize = 0;
int hintssize = 0;
char* alloc_ptr = NULL;
int err;
long rc;
if (req == NULL || (node == NULL && service == NULL)) {
return UV_EINVAL;
@ -277,12 +280,19 @@ int uv_getaddrinfo(uv_loop_t* loop,
/* calculate required memory size for all input values */
if (node != NULL) {
nodesize = ALIGNED_SIZE(MultiByteToWideChar(CP_UTF8, 0, node, -1, NULL, 0) *
sizeof(WCHAR));
rc = uv__idna_toascii(node,
node + strlen(node),
hostname_ascii,
hostname_ascii + sizeof(hostname_ascii));
if (rc < 0)
return rc;
nodesize = ALIGNED_SIZE(MultiByteToWideChar(CP_UTF8, 0, hostname_ascii,
-1, NULL, 0) * sizeof(WCHAR));
if (nodesize == 0) {
err = GetLastError();
goto error;
}
node = hostname_ascii;
}
if (service != NULL) {

195
test/test-idna.c Normal file
View File

@ -0,0 +1,195 @@
/* Copyright The libuv project and contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "task.h"
#include "../src/idna.c"
#include <string.h>
TEST_IMPL(utf8_decode1) {
const char* p;
char b[32];
int i;
/* ASCII. */
p = b;
snprintf(b, sizeof(b), "%c\x7F", 0x00);
ASSERT(0 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 1);
ASSERT(127 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 2);
/* Two-byte sequences. */
p = b;
snprintf(b, sizeof(b), "\xC2\x80\xDF\xBF");
ASSERT(128 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 2);
ASSERT(0x7FF == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 4);
/* Three-byte sequences. */
p = b;
snprintf(b, sizeof(b), "\xE0\xA0\x80\xEF\xBF\xBF");
ASSERT(0x800 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 3);
ASSERT(0xFFFF == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 6);
/* Four-byte sequences. */
p = b;
snprintf(b, sizeof(b), "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF");
ASSERT(0x10000 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 4);
ASSERT(0x10FFFF == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 8);
/* Four-byte sequences > U+10FFFF; disallowed. */
p = b;
snprintf(b, sizeof(b), "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF");
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 4);
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 8);
/* Overlong; disallowed. */
p = b;
snprintf(b, sizeof(b), "\xC0\x80\xC1\x80");
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 2);
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 4);
/* Surrogate pairs; disallowed. */
p = b;
snprintf(b, sizeof(b), "\xED\xA0\x80\xED\xA3\xBF");
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 3);
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + 6);
/* Simply illegal. */
p = b;
snprintf(b, sizeof(b), "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
for (i = 1; i <= 8; i++) {
ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
ASSERT(p == b + i);
}
return 0;
}
/* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
#ifndef __MVS__
#define F(input, err) \
do { \
char d[256] = {0}; \
static const char s[] = "" input ""; \
ASSERT(err == uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \
} while (0)
#define T(input, expected) \
do { \
long n; \
char d1[256] = {0}; \
char d2[256] = {0}; \
static const char s[] = "" input ""; \
n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \
ASSERT(n == sizeof(expected)); \
ASSERT(0 == memcmp(d1, expected, n)); \
/* Sanity check: encoding twice should not change the output. */ \
n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \
ASSERT(n == sizeof(expected)); \
ASSERT(0 == memcmp(d2, expected, n)); \
ASSERT(0 == memcmp(d1, d2, sizeof(d2))); \
} while (0)
TEST_IMPL(idna_toascii) {
/* Illegal inputs. */
F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */
F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */
/* No conversion. */
T("", "");
T(".", ".");
T(".com", ".com");
T("example", "example");
T("example-", "example-");
T("straße.de", "xn--strae-oqa.de");
/* Test cases adapted from punycode.js. Most are from RFC 3492. */
T("foo.bar", "foo.bar");
T("mañana.com", "xn--maana-pta.com");
T("example.com.", "example.com.");
T("bücher.com", "xn--bcher-kva.com");
T("café.com", "xn--caf-dma.com");
T("café.café.com", "xn--caf-dma.xn--caf-dma.com");
T("☃-⌘.com", "xn----dqo34k.com");
T("퐀☃-⌘.com", "xn----dqo34kn65z.com");
T("💩.la", "xn--ls8h.la");
T("mañana.com", "xn--maana-pta.com");
T("mañana。com", "xn--maana-pta.com");
T("mañanacom", "xn--maana-pta.com");
T("mañana。com", "xn--maana-pta.com");
T("ü", "xn--tda");
T("", ".xn--tda");
T("ü.ü", "xn--tda.xn--tda");
T("ü.ü.", "xn--tda.xn--tda.");
T("üëäö♥", "xn--4can8av2009b");
T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
"xn--Willst du die Blthe des frhen, "
"die Frchte des spteren Jahres-x9e96lkal");
T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn");
T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye");
T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb");
T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a");
T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b");
T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
"xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd");
T("なぜみんな日本語を話してくれないのか",
"xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa");
T("세계의모든사람들이한국어를이해한다면얼마나좋을까",
"xn--989aomsvi5e83db1d2a355cv1e0vak1d"
"wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c");
T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
T("PorquénopuedensimplementehablarenEspañol",
"xn--PorqunopuedensimplementehablarenEspaol-fmd56a");
T("TạisaohọkhôngthểchỉnóitiếngViệt",
"xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g");
T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b");
T("安室奈美恵-with-SUPER-MONKEYS",
"xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n");
T("Hello-Another-Way-それぞれの場所",
"xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v");
T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e");
T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d");
T("そのスピードで", "xn--d9juau41awczczp");
T("-> $1.00 <-", "-> $1.00 <-");
/* Test cases from https://unicode.org/reports/tr46/ */
T("faß.de", "xn--fa-hia.de");
T("βόλος.com", "xn--nxasmm1c.com");
T("ශ්‍රී.com", "xn--10cl1a0b660p.com");
T("نامه‌ای.com", "xn--mgba3gch31f060k.com");
return 0;
}
#undef T
#endif /* __MVS__ */

View File

@ -442,6 +442,9 @@ TEST_DECLARE (fork_threadpool_queue_work_simple)
#endif
#endif
TEST_DECLARE (idna_toascii)
TEST_DECLARE (utf8_decode1)
TASK_LIST_START
TEST_ENTRY_CUSTOM (platform_output, 0, 1, 5000)
@ -946,6 +949,13 @@ TASK_LIST_START
#endif
#endif
TEST_ENTRY (utf8_decode1)
/* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
#ifndef __MVS__
TEST_ENTRY (idna_toascii)
#endif
#if 0
/* These are for testing the test runner. */
TEST_ENTRY (fail_always)

View File

@ -46,6 +46,7 @@
'test-homedir.c',
'test-hrtime.c',
'test-idle.c',
'test-idna.c',
'test-ip6-addr.c',
'test-ipc-heavy-traffic-deadlock-bug.c',
'test-ipc-send-recv.c',

2
uv.gyp
View File

@ -70,6 +70,8 @@
'include/uv/version.h',
'src/fs-poll.c',
'src/heap-inl.h',
'src/idna.c',
'src/idna.h',
'src/inet.c',
'src/queue.h',
'src/threadpool.c',