summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Satiro <raysatiro@yahoo.com>2015-07-01 22:06:10 -0400
committerJay Satiro <raysatiro@yahoo.com>2015-07-01 22:07:27 -0400
commitf379bb37945295407acf3ba2c695d3703e4c9c51 (patch)
tree9427ba65fea613ff3dadcc1464bcd9d1acc7a9cf
parent741e17734dde7cbcf240ad86bd3eaad63f12ac60 (diff)
downloadcurl-f379bb37945295407acf3ba2c695d3703e4c9c51.tar.gz
url: Don't pass UTF-8 hostname to libidn unless it's valid UTF-8. draft2
- Fix bug in utf8len: Return error when another character in the sequence is expected but that character is null. That is what I had initally intended, but I made a mistake with the logic. - Convert locale to UTF-8 then check if UTF-8 is valid. My understanding of the conversion from locale to UTF-8 is that if the locale string which is eventually passed from libidn to libiconv as 'from_codeset' is determined to be UTF-8 then (according to the reporter Thijs Alkemade) no conversion will happen since the 'to_codeset' is "UTF-8". So, we can only check for valid UTF-8 after we're sure we have UTF-8. The alternative here I guess would be to second guess iconv which could lead to bugs as Daniel noted on the wget mailing list, or just call it directly like Alessandro has done.
-rw-r--r--lib/url.c13
1 files changed, 8 insertions, 5 deletions
diff --git a/lib/url.c b/lib/url.c
index 0facadcbf..880297b51 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -3656,7 +3656,7 @@ static curl_off_t utf8len(const char *str)
continue;
if(*ch < 0xC2 || *ch > 0xF4)
return error;
- if(*++ch)
+ if(!*++ch)
return error;
/* second byte */
if(first == 0xE0) {
@@ -3681,14 +3681,14 @@ static curl_off_t utf8len(const char *str)
if(first <= 0xDF)
continue;
}
- if(*++ch)
+ if(!*++ch)
return error;
/* third byte */
if(*ch < 0x80 || *ch > 0xBF)
return error;
if(first <= 0xEF)
continue;
- if(*++ch)
+ if(!*++ch)
return error;
/* fourth byte */
if(*ch < 0x80 || *ch > 0xBF)
@@ -3775,12 +3775,15 @@ static void fix_hostname(struct SessionHandle *data,
char *ace_hostname = NULL;
int rc;
/* Don't pass UTF-8 hostname to libidn unless it's valid UTF-8 */
- if(codepage_is_utf8() && utf8len(host->name) < 0){
+ char *utf8 = stringprep_locale_to_utf8(host->name);
+ if(utf8len(utf8) < 0) {
infof(data, "Hostname contains invalid UTF-8 sequence\n");
rc = IDNA_STRINGPREP_ERROR;
}
else
- rc = idna_to_ascii_lz(host->name, &ace_hostname, 0);
+ rc = idna_to_ascii_8z(utf8, &ace_hostname, 0);
+ idn_free(utf8);
+ utf8 = NULL;
infof (data, "Input domain encoded as `%s'\n",
stringprep_locale_charset ());
if(rc != IDNA_SUCCESS)