1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/renderer/android/email_detector.h"
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/strings/utf_string_conversions.h"
#include "content/public/renderer/android_content_detection_prefixes.h"
#include "net/base/escape.h"
#include "third_party/icu/source/i18n/unicode/regex.h"
namespace {
// Maximum length of an email address.
const size_t kMaximumEmailLength = 254;
// Regex to match email addresses.
// This is more specific than RFC 2822 (uncommon special characters are
// disallowed) in order to avoid false positives.
// Delimiters are word boundaries to allow punctuation, quote marks etc. around
// the address.
const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
} // anonymous namespace
namespace content {
EmailDetector::EmailDetector() {
}
size_t EmailDetector::GetMaximumContentLength() {
return kMaximumEmailLength;
}
GURL EmailDetector::GetIntentURL(const std::string& content_text) {
if (content_text.empty())
return GURL();
return GURL(kEmailPrefix +
net::EscapeQueryParamValue(content_text, true));
}
bool EmailDetector::FindContent(const string16::const_iterator& begin,
const string16::const_iterator& end,
size_t* start_pos,
size_t* end_pos,
std::string* content_text) {
string16 utf16_input = string16(begin, end);
icu::UnicodeString pattern(kEmailRegex);
icu::UnicodeString input(utf16_input.data(), utf16_input.length());
UErrorCode status = U_ZERO_ERROR;
scoped_ptr<icu::RegexMatcher> matcher(
new icu::RegexMatcher(pattern,
input,
UREGEX_CASE_INSENSITIVE,
status));
if (matcher->find()) {
*start_pos = matcher->start(status);
DCHECK(U_SUCCESS(status));
*end_pos = matcher->end(status);
DCHECK(U_SUCCESS(status));
icu::UnicodeString content_ustr(matcher->group(status));
DCHECK(U_SUCCESS(status));
UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(), content_text);
return true;
}
return false;
}
} // namespace content
|