summaryrefslogtreecommitdiff
path: root/chromium/base/i18n/string_search.cc
blob: 9f830d48924e73cfd30e7795aa43ca1385ad86e9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <stdint.h>

#include "base/i18n/string_search.h"

#include "third_party/icu/source/i18n/unicode/usearch.h"

namespace base {
namespace i18n {

FixedPatternStringSearch::FixedPatternStringSearch(const string16& find_this,
                                                   bool case_sensitive)
    : find_this_(find_this) {
  // usearch_open requires a valid string argument to be searched, even if we
  // want to set it by usearch_setText afterwards. So, supplying a dummy text.
  const string16& dummy = find_this_;

  UErrorCode status = U_ZERO_ERROR;
  search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
                         dummy.size(), uloc_getDefault(),
                         nullptr,  // breakiter
                         &status);
  if (U_SUCCESS(status)) {
    // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
    // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
    // differences. Set comparison level to UCOL_TERTIARY to include all
    // comparison differences.
    // Diacritical differences on the same base letter represent a
    // secondary difference.
    // Uppercase and lowercase versions of the same character represents a
    // tertiary difference.
    UCollator* collator = usearch_getCollator(search_);
    ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
    usearch_reset(search_);
  }
}

FixedPatternStringSearch::~FixedPatternStringSearch() {
  if (search_)
    usearch_close(search_);
}

bool FixedPatternStringSearch::Search(const string16& in_this,
                                      size_t* match_index,
                                      size_t* match_length,
                                      bool forward_search) {
  UErrorCode status = U_ZERO_ERROR;
  usearch_setText(search_, in_this.data(), in_this.size(), &status);

  // Default to basic substring search if usearch fails. According to
  // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
  // if either |find_this| or |in_this| are empty. In either case basic
  // substring search will give the correct return value.
  if (!U_SUCCESS(status)) {
    size_t index = in_this.find(find_this_);
    if (index == string16::npos)
      return false;
    if (match_index)
      *match_index = index;
    if (match_length)
      *match_length = find_this_.size();
    return true;
  }

  int32_t index = forward_search ? usearch_first(search_, &status)
                                 : usearch_last(search_, &status);
  if (!U_SUCCESS(status) || index == USEARCH_DONE)
    return false;
  if (match_index)
    *match_index = static_cast<size_t>(index);
  if (match_length)
    *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
  return true;
}

FixedPatternStringSearchIgnoringCaseAndAccents::
    FixedPatternStringSearchIgnoringCaseAndAccents(const string16& find_this)
    : base_search_(find_this, /*case_sensitive=*/false) {}

bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
    const string16& in_this,
    size_t* match_index,
    size_t* match_length) {
  return base_search_.Search(in_this, match_index, match_length,
                             /*forward_search=*/true);
}

bool StringSearchIgnoringCaseAndAccents(const string16& find_this,
                                        const string16& in_this,
                                        size_t* match_index,
                                        size_t* match_length) {
  return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
      in_this, match_index, match_length);
}

bool StringSearch(const string16& find_this,
                  const string16& in_this,
                  size_t* match_index,
                  size_t* match_length,
                  bool case_sensitive,
                  bool forward_search) {
  return FixedPatternStringSearch(find_this, case_sensitive)
      .Search(in_this, match_index, match_length, forward_search);
}

}  // namespace i18n
}  // namespace base