summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Brüning <michael.bruning@qt.io>2020-09-30 15:46:53 +0200
committerMichael Brüning <michael.bruning@qt.io>2020-10-05 12:03:05 +0000
commita4599b6197515dbf0226592be68e53d473edf0ca (patch)
tree032c749eba2a170667ca997c94ff67633d79667d
parentb769634b87a207300f9e2f9f6c3757c9bd31a9dd (diff)
downloadqtwebengine-chromium-a4599b6197515dbf0226592be68e53d473edf0ca.tar.gz
[Backport] CVE-2020-6571: Incorrect security UI in Omnibox
Manual backport of patch originally reviewed on https://chromium-review.googlesource.com/c/chromium/src/+/2212807: [IDN Spoof Checks] Add U+0A5C to digit lookalikes. Also backports the digit lookalike checks. This CL adds U+0A5C as a lookalike character to the digit 3. This change appears to impact no domains seen in UKM. Fixed: 1085315 Change-Id: Iea9930363d853f154e2d781646a1b0b5da7fbbfd Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r--chromium/components/url_formatter/idn_spoof_checker.cc30
-rw-r--r--chromium/components/url_formatter/idn_spoof_checker.h6
2 files changed, 36 insertions, 0 deletions
diff --git a/chromium/components/url_formatter/idn_spoof_checker.cc b/chromium/components/url_formatter/idn_spoof_checker.cc
index af6b3be4272..83f54225a58 100644
--- a/chromium/components/url_formatter/idn_spoof_checker.cc
+++ b/chromium/components/url_formatter/idn_spoof_checker.cc
@@ -207,6 +207,15 @@ IDNSpoofChecker::IDNSpoofChecker() {
icu::UnicodeSet(UNICODE_STRING_SIMPLE("[[:Cyrl:]]"), status);
cyrillic_letters_.freeze();
+ // These characters are, or look like, digits. A domain label entirely made of
+ // digit-lookalikes or digits is blocked.
+ digits_ = icu::UnicodeSet(UNICODE_STRING_SIMPLE("[0-9]"), status);
+ digits_.freeze();
+ digit_lookalikes_ = icu::UnicodeSet(
+ icu::UnicodeString::fromUTF8("[θ२২੨੨૨೩೭շзҙӡउওਤ੩૩౩ဒვპੜ੫丩ㄐճ৪੪୫૭୨౨]"),
+ status);
+ digit_lookalikes_.freeze();
+
DCHECK(U_SUCCESS(status));
// This set is used to determine whether or not to apply a slow
// transliteration to remove diacritics to a given hostname before the
@@ -379,6 +388,10 @@ bool IDNSpoofChecker::SafeToDisplayAsUnicode(
!IsMadeOfLatinAlikeCyrillic(label_string);
}
+ // Disallow domains that contain only numbers and number-spoofs.
+ if (IsDigitLookalike(label_string))
+ return false;
+
// Additional checks for |label| with multiple scripts, one of which is Latin.
// Disallow non-ASCII Latin letters to mix with a non-Latin script.
// Note that the non-ASCII Latin check should not be applied when the entire
@@ -576,6 +589,23 @@ void IDNSpoofChecker::SetAllowedUnicodeSet(UErrorCode* status) {
uspoof_setAllowedUnicodeSet(checker_, &allowed_set, status);
}
+bool IDNSpoofChecker::IsDigitLookalike(const icu::UnicodeString& label) {
+ bool has_lookalike_char = false;
+ icu::StringCharacterIterator it(label);
+ for (it.setToStart(); it.hasNext();) {
+ const UChar32 c = it.next32PostInc();
+ if (digits_.contains(c)) {
+ continue;
+ }
+ if (digit_lookalikes_.contains(c)) {
+ has_lookalike_char = true;
+ continue;
+ }
+ return false;
+ }
+ return has_lookalike_char;
+}
+
bool IDNSpoofChecker::IsCyrillicTopLevelDomain(
base::StringPiece tld,
base::StringPiece16 tld_unicode) const {
diff --git a/chromium/components/url_formatter/idn_spoof_checker.h b/chromium/components/url_formatter/idn_spoof_checker.h
index edc5ae0083a..cbcc7d8d821 100644
--- a/chromium/components/url_formatter/idn_spoof_checker.h
+++ b/chromium/components/url_formatter/idn_spoof_checker.h
@@ -79,6 +79,10 @@ class IDNSpoofChecker {
bool IsCyrillicTopLevelDomain(base::StringPiece tld,
base::StringPiece16 tld_unicode) const;
+ // Returns true if the string is entirely made up of either digits or
+ // characters that look like digits (but not exclusively actual digits).
+ bool IsDigitLookalike(const icu::UnicodeString& label);
+
// Used for unit tests.
static void SetTrieParamsForTesting(const HuffmanTrieParams& trie_params);
static void RestoreTrieParamsForTesting();
@@ -90,6 +94,8 @@ class IDNSpoofChecker {
icu::UnicodeSet combining_diacritics_exceptions_;
icu::UnicodeSet cyrillic_letters_;
icu::UnicodeSet cyrillic_letters_latin_alike_;
+ icu::UnicodeSet digits_;
+ icu::UnicodeSet digit_lookalikes_;
icu::UnicodeSet lgc_letters_n_ascii_;
icu::UnicodeSet icelandic_characters_;
std::unique_ptr<icu::Transliterator> diacritic_remover_;