chromium/base/i18n/case_conversion_unittest.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/i18n/case_conversion.h"
#include "base/i18n/rtl.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/icu_test_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/icu/source/i18n/unicode/usearch.h"

namespace base {
namespace i18n {

namespace {

const wchar_t kNonASCIIMixed[] =
    L"\xC4\xD6\xE4\xF6\x20\xCF\xEF\x20\xF7\x25"
    L"\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07\x1F0F"
    L"\x20\x1E00\x1E01";
const wchar_t kNonASCIILower[] =
    L"\xE4\xF6\xE4\xF6\x20\xEF\xEF"
    L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F07"
    L"\x1F07\x20\x1E01\x1E01";
const wchar_t kNonASCIIUpper[] =
    L"\xC4\xD6\xC4\xD6\x20\xCF\xCF"
    L"\x20\xF7\x25\xA4\x23\x2A\x5E\x60\x40\xA3\x24\x2030\x201A\x7E\x20\x1F0F"
    L"\x1F0F\x20\x1E00\x1E00";

}  // namespace

// Test upper and lower case string conversion.
TEST(CaseConversionTest, UpperLower) {
  const string16 mixed(ASCIIToUTF16("Text with UPPer & lowER casE."));
  const string16 expected_lower(ASCIIToUTF16("text with upper & lower case."));
  const string16 expected_upper(ASCIIToUTF16("TEXT WITH UPPER & LOWER CASE."));

  string16 result = ToLower(mixed);
  EXPECT_EQ(expected_lower, result);

  result = ToUpper(mixed);
  EXPECT_EQ(expected_upper, result);
}

TEST(CaseConversionTest, NonASCII) {
  const string16 mixed(WideToUTF16(kNonASCIIMixed));
  const string16 expected_lower(WideToUTF16(kNonASCIILower));
  const string16 expected_upper(WideToUTF16(kNonASCIIUpper));

  string16 result = ToLower(mixed);
  EXPECT_EQ(expected_lower, result);

  result = ToUpper(mixed);
  EXPECT_EQ(expected_upper, result);
}

TEST(CaseConversionTest, TurkishLocaleConversion) {
  const string16 mixed(WideToUTF16(L"\x49\x131"));
  const string16 expected_lower(WideToUTF16(L"\x69\x131"));
  const string16 expected_upper(WideToUTF16(L"\x49\x49"));

  test::ScopedRestoreICUDefaultLocale restore_locale;
  i18n::SetICUDefaultLocale("en_US");

  string16 result = ToLower(mixed);
  EXPECT_EQ(expected_lower, result);

  result = ToUpper(mixed);
  EXPECT_EQ(expected_upper, result);

  i18n::SetICUDefaultLocale("tr");

  const string16 expected_lower_turkish(WideToUTF16(L"\x131\x131"));
  const string16 expected_upper_turkish(WideToUTF16(L"\x49\x49"));

  result = ToLower(mixed);
  EXPECT_EQ(expected_lower_turkish, result);

  result = ToUpper(mixed);
  EXPECT_EQ(expected_upper_turkish, result);
}

TEST(CaseConversionTest, FoldCase) {
  // Simple ASCII, should lower-case.
  EXPECT_EQ(ASCIIToUTF16("hello, world"),
            FoldCase(ASCIIToUTF16("Hello, World")));

  // Non-ASCII cases from above. They should all fold to the same result.
  EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
            FoldCase(WideToUTF16(kNonASCIILower)));
  EXPECT_EQ(FoldCase(WideToUTF16(kNonASCIIMixed)),
            FoldCase(WideToUTF16(kNonASCIIUpper)));

  // Turkish cases from above. This is the lower-case expected result from the
  // US locale. It should be the same even when the current locale is Turkish.
  const string16 turkish(WideToUTF16(L"\x49\x131"));
  const string16 turkish_expected(WideToUTF16(L"\x69\x131"));

  test::ScopedRestoreICUDefaultLocale restore_locale;
  i18n::SetICUDefaultLocale("en_US");
  EXPECT_EQ(turkish_expected, FoldCase(turkish));

  i18n::SetICUDefaultLocale("tr");
  EXPECT_EQ(turkish_expected, FoldCase(turkish));

  // Test a case that gets bigger when processed.
  // U+130 = LATIN CAPITAL LETTER I WITH DOT ABOVE gets folded to a lower case
  // "i" followed by U+307 COMBINING DOT ABOVE.
  EXPECT_EQ(WideToUTF16(L"i\u0307j"), FoldCase(WideToUTF16(L"\u0130j")));

  // U+00DF (SHARP S) and U+1E9E (CAPIRAL SHARP S) are both folded to "ss".
  EXPECT_EQ(ASCIIToUTF16("ssss"), FoldCase(WideToUTF16(L"\u00DF\u1E9E")));
}

}  // namespace i18n
}  // namespace base