diff options
author | Jay Satiro <raysatiro@yahoo.com> | 2015-07-01 22:06:10 -0400 |
---|---|---|
committer | Jay Satiro <raysatiro@yahoo.com> | 2015-07-01 22:07:27 -0400 |
commit | f379bb37945295407acf3ba2c695d3703e4c9c51 (patch) | |
tree | 9427ba65fea613ff3dadcc1464bcd9d1acc7a9cf | |
parent | 741e17734dde7cbcf240ad86bd3eaad63f12ac60 (diff) | |
download | curl-f379bb37945295407acf3ba2c695d3703e4c9c51.tar.gz |
url: Don't pass UTF-8 hostname to libidn unless it's valid UTF-8. draft2
- Fix bug in utf8len: Return error when another character in the
sequence is expected but that character is null. That is what I had
initally intended, but I made a mistake with the logic.
- Convert locale to UTF-8 then check if UTF-8 is valid.
My understanding of the conversion from locale to UTF-8 is that if the
locale string which is eventually passed from libidn to libiconv as
'from_codeset' is determined to be UTF-8 then (according to the reporter
Thijs Alkemade) no conversion will happen since the 'to_codeset' is
"UTF-8".
So, we can only check for valid UTF-8 after we're sure we have UTF-8.
The alternative here I guess would be to second guess iconv which could
lead to bugs as Daniel noted on the wget mailing list, or just call it
directly like Alessandro has done.
-rw-r--r-- | lib/url.c | 13 |
1 files changed, 8 insertions, 5 deletions
@@ -3656,7 +3656,7 @@ static curl_off_t utf8len(const char *str) continue; if(*ch < 0xC2 || *ch > 0xF4) return error; - if(*++ch) + if(!*++ch) return error; /* second byte */ if(first == 0xE0) { @@ -3681,14 +3681,14 @@ static curl_off_t utf8len(const char *str) if(first <= 0xDF) continue; } - if(*++ch) + if(!*++ch) return error; /* third byte */ if(*ch < 0x80 || *ch > 0xBF) return error; if(first <= 0xEF) continue; - if(*++ch) + if(!*++ch) return error; /* fourth byte */ if(*ch < 0x80 || *ch > 0xBF) @@ -3775,12 +3775,15 @@ static void fix_hostname(struct SessionHandle *data, char *ace_hostname = NULL; int rc; /* Don't pass UTF-8 hostname to libidn unless it's valid UTF-8 */ - if(codepage_is_utf8() && utf8len(host->name) < 0){ + char *utf8 = stringprep_locale_to_utf8(host->name); + if(utf8len(utf8) < 0) { infof(data, "Hostname contains invalid UTF-8 sequence\n"); rc = IDNA_STRINGPREP_ERROR; } else - rc = idna_to_ascii_lz(host->name, &ace_hostname, 0); + rc = idna_to_ascii_8z(utf8, &ace_hostname, 0); + idn_free(utf8); + utf8 = NULL; infof (data, "Input domain encoded as `%s'\n", stringprep_locale_charset ()); if(rc != IDNA_SUCCESS) |