diff options
author | Daniel Hardman <daniel.hardman@gmail.com> | 2015-07-08 03:04:46 -0400 |
---|---|---|
committer | Jay Satiro <raysatiro@yahoo.com> | 2015-07-08 03:13:31 -0400 |
commit | 288f6917bbd3decf94ad6a725e7b90def965c9c3 (patch) | |
tree | 7381f83eff22fc2539b7517da6a02d3bac8a3d12 | |
parent | bd2ecf6a552ccffe8797b81f18522338fa7e1a71 (diff) | |
download | curl-288f6917bbd3decf94ad6a725e7b90def965c9c3.tar.gz |
url: Don't pass UTF-8 hostname to libidn unless it's valid UTF-8. draft5
- Add a unit test.
There is some question about whether we should be returning a better
error (currently, the validation error, IDNA_STRINGPREP_ERROR, gets lost
because the only action we take on invalid input is to not convert
non-ascii hostnames, which eventually causes a dns failure; that's what
gets reported). The unit test for the fix thus tests dns failure instead
of looking for a specific error from utf8 validation.
-rw-r--r-- | lib/non-ascii.c | 2 | ||||
-rw-r--r-- | lib/url.c | 9 | ||||
-rw-r--r-- | tests/data/Makefile.inc | 2 | ||||
-rw-r--r-- | tests/data/test1603 | 26 | ||||
-rw-r--r-- | tests/unit/Makefile.inc | 4 | ||||
-rw-r--r-- | tests/unit/unit1603.c | 108 |
6 files changed, 147 insertions, 4 deletions
diff --git a/lib/non-ascii.c b/lib/non-ascii.c index adb41de88..97f30ccc4 100644 --- a/lib/non-ascii.c +++ b/lib/non-ascii.c @@ -347,7 +347,7 @@ This function also tests for valid UTF-8 in accordance with the Unicode Standard, Section Conformance 3.9, Table 3-7, Well-Formed UTF-8 Byte Sequences. http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G7404 -Success: Returns the number of UTF-8 characters. +Success: Returns the number of unicode codepoints encoded in a UTF-8 string. Failure: Returns -1 if 'str' is NULL or points to invalid UTF-8. */ curl_off_t utf8len(const char *str) @@ -3671,7 +3671,7 @@ static bool tld_check_name(struct SessionHandle *data, return TRUE; } -#endif +#endif /* USE_LIBIDN */ /* * Perform any necessary IDN conversion of hostname @@ -3705,8 +3705,13 @@ static void fix_hostname(struct SessionHandle *data, if(stringprep_check_version(LIBIDN_REQUIRED_VERSION)) { char *ace_hostname = NULL; int rc; - /* Don't pass UTF-8 hostname to libidn unless it's valid UTF-8 */ char *utf8 = stringprep_locale_to_utf8(host->name); + /* Don't pass UTF-8 hostname to libidn unless it's valid UTF-8. This check + * is quite picky; in addition to 5- and 6-byte sequences and invalid lead + * and continuation bytes, CESU-8 and so-called 'Modified UTF-8' sequences + * are also disallowed. This is a security measure; unsanitized UTF-8 + * could be used to encode embedded null bytes and other undesirable stuff. + */ if(utf8len(utf8) < 0) { infof(data, "Hostname contains invalid UTF-8 sequence\n"); rc = IDNA_STRINGPREP_ERROR; diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc index 51823fe88..05602f8fa 100644 --- a/tests/data/Makefile.inc +++ b/tests/data/Makefile.inc @@ -155,7 +155,7 @@ test1520 \ \ test1525 test1526 test1527 test1528 test1529 test1530 test1531 \ \ -test1600 test1601 test1602 \ +test1600 test1601 test1602 test1603 \ \ test1800 test1801 \ \ diff --git a/tests/data/test1603 b/tests/data/test1603 new file mode 100644 index 000000000..7a78ffe4c --- /dev/null +++ b/tests/data/test1603 @@ -0,0 +1,26 @@ +<testcase> +<info> +<keywords> +unittest +idn +</keywords> +</info> + +# +# Client-side +<client> +<server> +none +</server> +<features> +unittest +</features> + <name> +Internal sanitization of idn input testing + </name> +<tool> +unit1603 +</tool> +</client> + +</testcase> diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc index 9073b34e6..e1c1821e1 100644 --- a/tests/unit/Makefile.inc +++ b/tests/unit/Makefile.inc @@ -8,6 +8,7 @@ UNITFILES = curlcheck.h \ UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \ unit1308 unit1309 unit1330 unit1394 unit1395 unit1396 unit1397 unit1398 \ unit1600 unit1601 unit1602 + unit1600 unit1601 unit1602 unit1603 unit1300_SOURCES = unit1300.c $(UNITFILES) unit1300_CPPFLAGS = $(AM_CPPFLAGS) @@ -66,3 +67,6 @@ unit1601_CPPFLAGS = $(AM_CPPFLAGS) unit1602_SOURCES = unit1602.c $(UNITFILES) unit1602_CPPFLAGS = $(AM_CPPFLAGS) +unit1603_SOURCES = unit1603.c $(UNITFILES) +unit1603_CPPFLAGS = $(AM_CPPFLAGS) + diff --git a/tests/unit/unit1603.c b/tests/unit/unit1603.c new file mode 100644 index 000000000..0f835df75 --- /dev/null +++ b/tests/unit/unit1603.c @@ -0,0 +1,108 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2015, Daniel Stenberg, <daniel@haxx.se>, et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +#include "curlcheck.h" + +#include "urldata.h" +#include "non-ascii.h" + +CURL *easy; + +static CURLcode unit_setup(void) +{ + easy = curl_easy_init(); + return CURLE_OK; +} + +static void unit_stop(void) +{ + curl_easy_cleanup(easy); +} + + +UNITTEST_START + +#ifdef USE_LIBIDN + fail_unless( utf8len(NULL) == -1, "null string should be an error" ); + + fail_unless( utf8len("") == 0, "empty string should get utf8len == 0" ); + + fail_unless( utf8len("\r\n") == 2, "ordinary ascii should get utf8len ==" + " strlen, even if it contains control chars"); + + /* Mixture of normal and double-byte sequences as used in latin langs. */ + fail_unless( utf8len("\xC2\xA9 2001, Chang\xC3\xA9 Corp.") == 20, + "utf8len should handle valid latin 1"); + + /* Japanese, Russian, Greek 2- and 3-byte sequences -- with a little ASCII */ + fail_unless( utf8len("\xE5\xA4\x89\xE3\x82\x8F\xD1\x81\xD0\xB2 ascii " + "\xD1\x8F\xD0\xB7\xCF\x8E\xCF\x81\xCE\xB1") == 16, + "utf8len should support a mix of several interesting languages"); + + /* overlong encoding of the Euro sign */ + fail_unless( utf8len("\xF0\x82\x82\xAC") == -1, + "utf8len should reject overlong encodings"); + + /* overlong encoding of embedded null */ + fail_unless( utf8len("with embedded null \xC0\x80 <<there") == -1, + "utf8len must disallow embedded null with overlong encoding, which is" + " known as 'modified utf8' in some circles but which is dangerous when" + " passed to libidn"); + + /* surrogate pair */ + fail_unless(utf8len("\xED\xA0\x81\xED\xB0\x80") == -1, + "utf8len must disallow CESU-8-style surrogate pairs (see" + " http://j.mp/1HzJPBY)"); + + /* invalid trail bytes, per table 3.7 in the Unicode Standard v7, Section + Conformance 3.9, Table 3-7, Well-Formed UTF-8 Byte Sequences. + + http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G7404 + + These cases catch the same issues as the ones tested above, but the table + doesn't make it obvious which sort of malformation it's preventing. It + seems prudent to prove that the table and our algorithm and our named + scenarios all have the same scope... + */ + fail_unless(utf8len("\xE0\x9F\xB1") == -1, "bad 2nd byte"); + fail_unless(utf8len("\xED\xA0\xB1") == -1, "bad 2nd byte"); + fail_unless(utf8len("\xF0\x85\xB1\xB1") == -1, "bad 2nd byte"); + fail_unless(utf8len("\xF4\x90\xB1\xB1") == -1, "bad 2nd byte"); + + /* Up to this point, we've just proved that our validation logic is + * accurate. Now we need to prove that it actually gets invoked when we + * use libidn. We do this by setting up a connection to a url with a + * hostname that contains invalid utf8, and verifying that we get back + * an error that shows that our validation logic rejected it. + */ + struct SessionHandle * curl; + CURLcode ret = Curl_open(&curl); + fail_unless(ret == 0, "serious error"); + ret = curl_easy_setopt(curl, CURLOPT_URL, "http://x\xC0\x80.com/"); + fail_unless(ret == 0, "shouldn't complain yet"); + ret = curl_easy_perform(curl); + fail_unless(ret != 0, "should get error about invalid hostname"); + /*printf("%s\n", curl_easy_strerror(ret));*/ +#else + /*printf("test skipped; libidn not active\n");*/ +#endif + +UNITTEST_STOP |