summaryrefslogtreecommitdiff
path: root/chromium/chrome/renderer/safe_browsing
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-12 14:27:29 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2020-10-13 09:35:20 +0000
commitc30a6232df03e1efbd9f3b226777b07e087a1122 (patch)
treee992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/chrome/renderer/safe_browsing
parent7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff)
downloadqtwebengine-chromium-85-based.tar.gz
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/chrome/renderer/safe_browsing')
-rw-r--r--chromium/chrome/renderer/safe_browsing/DEPS3
-rw-r--r--chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc15
-rw-r--r--chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h30
-rw-r--r--chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc14
-rw-r--r--chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h29
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier.cc131
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier.h27
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc33
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc90
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h27
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc21
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc66
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h17
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc48
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc45
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h14
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc40
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc5
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc34
-rw-r--r--chromium/chrome/renderer/safe_browsing/scorer.cc34
-rw-r--r--chromium/chrome/renderer/safe_browsing/scorer.h6
-rw-r--r--chromium/chrome/renderer/safe_browsing/scorer_unittest.cc74
22 files changed, 414 insertions, 389 deletions
diff --git a/chromium/chrome/renderer/safe_browsing/DEPS b/chromium/chrome/renderer/safe_browsing/DEPS
index 76dae5d8e5a..6b8537549c7 100644
--- a/chromium/chrome/renderer/safe_browsing/DEPS
+++ b/chromium/chrome/renderer/safe_browsing/DEPS
@@ -1,9 +1,12 @@
include_rules = [
+ "+components/paint_preview/common",
"+components/safe_browsing/content/renderer",
+ "+components/safe_browsing/content/password_protection/visual_utils.h",
"+components/safe_browsing/core/common",
"+components/safe_browsing/core/proto/csd.pb.h",
"+components/safe_browsing/core/proto/client_model.pb.h",
"+components/safe_browsing/core/features.h",
+ "+cc/paint",
"+third_party/smhasher",
]
diff --git a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc b/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc
deleted file mode 100644
index 45fbd4d4437..00000000000
--- a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
-
-namespace safe_browsing {
-
-FeatureExtractorClock::~FeatureExtractorClock() {}
-
-base::TimeTicks FeatureExtractorClock::Now() {
- return base::TimeTicks::Now();
-}
-
-} // namespace safe_browsing
diff --git a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h b/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h
deleted file mode 100644
index 930c1a4b7ab..00000000000
--- a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// A simple abstraction for getting the current time during feature extraction.
-// This can be mocked out for testing.
-
-#ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_
-#define CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_
-
-#include "base/macros.h"
-#include "base/time/time.h"
-
-namespace safe_browsing {
-
-class FeatureExtractorClock {
- public:
- FeatureExtractorClock() {}
- virtual ~FeatureExtractorClock();
-
- // Returns the current time. May be mocked for testing.
- virtual base::TimeTicks Now();
-
- private:
- DISALLOW_COPY_AND_ASSIGN(FeatureExtractorClock);
-};
-
-} // namespace safe_browsing
-
-#endif // CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_
diff --git a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc b/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc
deleted file mode 100644
index 665baf0b5c3..00000000000
--- a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
-
-namespace safe_browsing {
-
-MockFeatureExtractorClock::MockFeatureExtractorClock()
- : FeatureExtractorClock() {}
-
-MockFeatureExtractorClock::~MockFeatureExtractorClock() {}
-
-} // namespace safe_browsing
diff --git a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h b/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h
deleted file mode 100644
index 7012ee81090..00000000000
--- a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// A mock implementation of FeatureExtractorClock for testing.
-
-#ifndef CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_
-#define CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_
-
-#include "base/macros.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
-#include "testing/gmock/include/gmock/gmock.h"
-
-namespace safe_browsing {
-
-class MockFeatureExtractorClock : public FeatureExtractorClock {
- public:
- MockFeatureExtractorClock();
- ~MockFeatureExtractorClock() override;
-
- MOCK_METHOD0(Now, base::TimeTicks());
-
- private:
- DISALLOW_COPY_AND_ASSIGN(MockFeatureExtractorClock);
-};
-
-} // namespace safe_browsing
-
-#endif // CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc
index 0842c7ea50a..c53c18d31b5 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc
@@ -11,18 +11,18 @@
#include "base/callback.h"
#include "base/compiler_specific.h"
#include "base/location.h"
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/string_util.h"
#include "base/threading/thread_task_runner_handle.h"
+#include "cc/paint/skia_paint_canvas.h"
#include "chrome/common/url_constants.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
#include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h"
#include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
#include "chrome/renderer/safe_browsing/scorer.h"
+#include "components/paint_preview/common/paint_preview_tracker.h"
#include "components/safe_browsing/core/proto/csd.pb.h"
#include "content/public/renderer/render_frame.h"
#include "crypto/sha2.h"
@@ -32,6 +32,7 @@
#include "third_party/blink/public/web/web_document_loader.h"
#include "third_party/blink/public/web/web_local_frame.h"
#include "third_party/blink/public/web/web_view.h"
+#include "ui/gfx/geometry/rect_conversions.h"
#include "url/gurl.h"
namespace safe_browsing {
@@ -39,32 +40,29 @@ namespace safe_browsing {
const float PhishingClassifier::kInvalidScore = -1.0;
const float PhishingClassifier::kPhishyThreshold = 0.5;
-PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame,
- FeatureExtractorClock* clock)
- : render_frame_(render_frame), scorer_(nullptr), clock_(clock) {
+PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame)
+ : render_frame_(render_frame), scorer_(nullptr) {
Clear();
}
PhishingClassifier::~PhishingClassifier() {
// The RenderView should have called CancelPendingClassification() before
// we are destroyed.
- CheckNoPendingClassification();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!page_text_);
}
void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {
- CheckNoPendingClassification();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!page_text_);
scorer_ = scorer;
if (scorer_) {
- url_extractor_.reset(new PhishingUrlFeatureExtractor);
- dom_extractor_.reset(new PhishingDOMFeatureExtractor(clock_.get()));
- term_extractor_.reset(new PhishingTermFeatureExtractor(
- &scorer_->page_terms(),
- &scorer_->page_words(),
- scorer_->max_words_per_term(),
- scorer_->murmurhash3_seed(),
- scorer_->max_shingles_per_page(),
- scorer_->shingle_size(),
- clock_.get()));
+ url_extractor_ = std::make_unique<PhishingUrlFeatureExtractor>();
+ dom_extractor_ = std::make_unique<PhishingDOMFeatureExtractor>();
+ term_extractor_ = std::make_unique<PhishingTermFeatureExtractor>(
+ &scorer_->page_terms(), &scorer_->page_words(),
+ scorer_->max_words_per_term(), scorer_->murmurhash3_seed(),
+ scorer_->max_shingles_per_page(), scorer_->shingle_size());
} else {
// We're disabling client-side phishing detection, so tear down all
// of the relevant objects.
@@ -75,7 +73,7 @@ void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) {
}
bool PhishingClassifier::is_ready() const {
- return scorer_ != NULL;
+ return !!scorer_;
}
void PhishingClassifier::BeginClassification(const base::string16* page_text,
@@ -84,7 +82,8 @@ void PhishingClassifier::BeginClassification(const base::string16* page_text,
// The RenderView should have called CancelPendingClassification() before
// starting a new classification, so DCHECK this.
- CheckNoPendingClassification();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!page_text_);
// However, in an opt build, we will go ahead and clean up the pending
// classification so that we can start in a known state.
CancelPendingClassification();
@@ -158,41 +157,70 @@ void PhishingClassifier::DOMExtractionFinished(bool success) {
void PhishingClassifier::TermExtractionFinished(bool success) {
if (success) {
- blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();
-
- // Hash all of the features so that they match the model, then compute
- // the score.
- FeatureMap hashed_features;
- ClientPhishingRequest verdict;
- verdict.set_model_version(scorer_->model_version());
- verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8());
- for (const auto& it : features_->features()) {
- bool result = hashed_features.AddRealFeature(
- crypto::SHA256HashString(it.first), it.second);
- DCHECK(result);
- ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
- feature->set_name(it.first);
- feature->set_value(it.second);
- }
- for (const auto& it : *shingle_hashes_) {
- verdict.add_shingle_hashes(it);
- }
- float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
- verdict.set_client_score(score);
- verdict.set_is_phishing(score >= scorer_->threshold_probability());
- RunCallback(verdict);
+ ExtractVisualFeatures();
} else {
RunFailureCallback();
}
}
-void PhishingClassifier::CheckNoPendingClassification() {
- DCHECK(done_callback_.is_null());
- DCHECK(!page_text_);
- if (!done_callback_.is_null() || page_text_) {
- LOG(ERROR) << "Classification in progress, missing call to "
- << "CancelPendingClassification";
+void PhishingClassifier::ExtractVisualFeatures() {
+ blink::WebLocalFrame* frame = render_frame_->GetWebFrame();
+ gfx::SizeF viewport_size = frame->View()->VisualViewportSize();
+ gfx::Rect bounds = ToEnclosingRect(gfx::RectF(viewport_size));
+ bitmap_ = std::make_unique<SkBitmap>();
+ // Use the Rec. 2020 color space, in case the user input is wide-gamut.
+ sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB(
+ {2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0},
+ SkNamedGamut::kRec2020);
+ SkImageInfo bitmap_info = SkImageInfo::Make(
+ bounds.width(), bounds.height(), SkColorType::kRGBA_8888_SkColorType,
+ SkAlphaType::kUnpremul_SkAlphaType, rec2020);
+ if (!bitmap_->tryAllocPixels(bitmap_info))
+ return VisualExtractionFinished(/*success=*/false);
+ SkCanvas sk_canvas(*bitmap_);
+ cc::SkiaPaintCanvas cc_canvas(&sk_canvas);
+ auto tracker = std::make_unique<paint_preview::PaintPreviewTracker>(
+ base::UnguessableToken::Create(), frame->GetEmbeddingToken(),
+ /*is_main_frame=*/true);
+ cc_canvas.SetPaintPreviewTracker(tracker.get());
+ VisualExtractionFinished(frame->CapturePaintPreview(
+ bounds, &cc_canvas, /*include_linked_destinations=*/false));
+}
+
+void PhishingClassifier::VisualExtractionFinished(bool success) {
+ if (!success) {
+ RunFailureCallback();
+ return;
+ }
+
+ blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();
+
+ // Hash all of the features so that they match the model, then compute
+ // the score.
+ FeatureMap hashed_features;
+ ClientPhishingRequest verdict;
+ verdict.set_model_version(scorer_->model_version());
+ verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8());
+ for (const auto& it : features_->features()) {
+ bool result = hashed_features.AddRealFeature(
+ crypto::SHA256HashString(it.first), it.second);
+ DCHECK(result);
+ ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
+ feature->set_name(it.first);
+ feature->set_value(it.second);
}
+ for (const auto& it : *shingle_hashes_) {
+ verdict.add_shingle_hashes(it);
+ }
+ float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
+ verdict.set_client_score(score);
+ verdict.set_is_phishing(score >= scorer_->threshold_probability());
+
+ if (scorer_->GetMatchingVisualTargets(*bitmap_, &verdict)) {
+ verdict.set_is_phishing(true);
+ }
+
+ RunCallback(verdict);
}
void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) {
@@ -211,10 +239,11 @@ void PhishingClassifier::RunFailureCallback() {
}
void PhishingClassifier::Clear() {
- page_text_ = NULL;
+ page_text_ = nullptr;
done_callback_.Reset();
- features_.reset(NULL);
- shingle_hashes_.reset(NULL);
+ features_.reset(nullptr);
+ shingle_hashes_.reset(nullptr);
+ bitmap_.reset(nullptr);
}
} // namespace safe_browsing
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier.h b/chromium/chrome/renderer/safe_browsing/phishing_classifier.h
index 9528c434cce..401ae402dba 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier.h
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier.h
@@ -27,6 +27,7 @@
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/strings/string16.h"
+#include "third_party/skia/include/core/SkBitmap.h"
namespace content {
class RenderFrame;
@@ -34,7 +35,6 @@ class RenderFrame;
namespace safe_browsing {
class ClientPhishingRequest;
-class FeatureExtractorClock;
class FeatureMap;
class PhishingDOMFeatureExtractor;
class PhishingTermFeatureExtractor;
@@ -55,11 +55,9 @@ class PhishingClassifier {
static const float kInvalidScore;
// Creates a new PhishingClassifier object that will operate on
- // |render_view|. |clock| is used to time feature extractor operations, and
- // the PhishingClassifier takes ownership of this object. Note that the
- // classifier will not be 'ready' until set_phishing_scorer() is called.
- PhishingClassifier(content::RenderFrame* render_frame,
- FeatureExtractorClock* clock);
+ // |render_view|. Note that the classifier will not be 'ready' until
+ // set_phishing_scorer() is called.
+ explicit PhishingClassifier(content::RenderFrame* render_frame);
virtual ~PhishingClassifier();
// Sets a scorer for the classifier to use in computing the phishiness score.
@@ -110,15 +108,18 @@ class PhishingClassifier {
void DOMExtractionFinished(bool success);
// Callback to be run when term feature extraction is complete.
+ // If it was successful, begins visual feature extraction, otherwise runs the
+ // DoneCallback with a non-phishy verdict.
+ void TermExtractionFinished(bool success);
+
+ // Called to extract the visual features of the current page.
+ void ExtractVisualFeatures();
+
+ // Callback when visual feature extraction is complete.
// If it was successful, computes a score and runs the DoneCallback.
// If extraction was unsuccessful, runs the DoneCallback with a
// non-phishy verdict.
- void TermExtractionFinished(bool success);
-
- // Helper to verify that there is no pending phishing classification. Dies
- // in debug builds if the state is not as expected. This is a no-op in
- // release builds.
- void CheckNoPendingClassification();
+ void VisualExtractionFinished(bool success);
// Helper method to run the DoneCallback and clear the state.
void RunCallback(const ClientPhishingRequest& verdict);
@@ -132,7 +133,6 @@ class PhishingClassifier {
content::RenderFrame* render_frame_; // owns us
const Scorer* scorer_; // owned by the caller
- std::unique_ptr<FeatureExtractorClock> clock_;
std::unique_ptr<PhishingUrlFeatureExtractor> url_extractor_;
std::unique_ptr<PhishingDOMFeatureExtractor> dom_extractor_;
std::unique_ptr<PhishingTermFeatureExtractor> term_extractor_;
@@ -141,6 +141,7 @@ class PhishingClassifier {
std::unique_ptr<FeatureMap> features_;
std::unique_ptr<std::set<uint32_t>> shingle_hashes_;
const base::string16* page_text_; // owned by the caller
+ std::unique_ptr<SkBitmap> bitmap_;
DoneCallback done_callback_;
// Used in scheduling BeginFeatureExtraction tasks.
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
index 2600269b203..484958e30db 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
@@ -12,9 +12,9 @@
#include "base/run_loop.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
+#include "base/test/test_discardable_memory_allocator.h"
#include "chrome/renderer/chrome_content_renderer_client.h"
#include "chrome/renderer/safe_browsing/features.h"
-#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
#include "chrome/renderer/safe_browsing/scorer.h"
#include "chrome/test/base/chrome_render_view_test.h"
@@ -60,6 +60,8 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
ChromeRenderViewTest::SetUp();
PrepareModel();
SetUpClassifier();
+
+ base::DiscardableMemoryAllocator::SetInstance(&test_allocator_);
}
void PrepareModel() {
@@ -99,18 +101,16 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
model.set_max_shingles_per_page(100);
model.set_shingle_size(3);
- clock_ = new MockFeatureExtractorClock;
+ // Add an empty visual target to ensure visual detection runs.
+ model.mutable_vision_model()->add_targets();
+
scorer_.reset(Scorer::Create(model.SerializeAsString()));
ASSERT_TRUE(scorer_.get());
-
- // These tests don't exercise the extraction timing.
- EXPECT_CALL(*clock_, Now())
- .WillRepeatedly(::testing::Return(base::TimeTicks::Now()));
}
void SetUpClassifier() {
- classifier_.reset(
- new PhishingClassifier(view_->GetMainRenderFrame(), clock_));
+ classifier_ =
+ std::make_unique<PhishingClassifier>(view_->GetMainRenderFrame());
// No scorer yet, so the classifier is not ready.
ASSERT_FALSE(classifier_->is_ready());
@@ -138,6 +138,8 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
verdict.feature_map(i).value());
}
is_phishing_ = verdict.is_phishing();
+ screenshot_phash_ = verdict.screenshot_phash();
+ phash_dimension_size_ = verdict.phash_dimension_size();
}
void LoadHtml(const GURL& url, const std::string& content) {
@@ -153,7 +155,6 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
std::string response_content_;
std::unique_ptr<Scorer> scorer_;
std::unique_ptr<PhishingClassifier> classifier_;
- MockFeatureExtractorClock* clock_; // Owned by classifier_.
// Features that are in the model.
const std::string url_tld_token_net_;
@@ -165,6 +166,11 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
FeatureMap feature_map_;
float phishy_score_;
bool is_phishing_;
+ std::string screenshot_phash_;
+ int phash_dimension_size_;
+
+ // A DiscardableMemoryAllocator is needed for certain Skia operations.
+ base::TestDiscardableMemoryAllocator test_allocator_;
};
TEST_F(PhishingClassifierTest, TestClassificationOfPhishingDotComHttp) {
@@ -261,6 +267,15 @@ TEST_F(PhishingClassifierTest, DisableDetection) {
EXPECT_FALSE(classifier_->is_ready());
}
+TEST_F(PhishingClassifierTest, TestSendsVisualHash) {
+ LoadHtml(GURL("https://host.net"),
+ "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
+ RunPhishingClassifier(&page_text_);
+
+ EXPECT_EQ(phash_dimension_size_, 48);
+ EXPECT_FALSE(screenshot_phash_.empty());
+}
+
// TODO(jialiul): Add test to verify that classification only starts on GET
// method. It seems there is no easy way to simulate a HTTP POST in
// ChromeRenderViewTest.
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
index 0afae4ea172..b65c048fd82 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
@@ -12,10 +12,8 @@
#include "base/callback.h"
#include "base/debug/stack_trace.h"
#include "base/lazy_instance.h"
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/phishing_classifier.h"
#include "chrome/renderer/safe_browsing/scorer.h"
#include "components/safe_browsing/content/common/safe_browsing.mojom-forward.h"
@@ -54,42 +52,6 @@ base::LazyInstance<std::unique_ptr<const safe_browsing::Scorer>>::
} // namespace
-// static
-void PhishingClassifierFilter::Create(
- mojo::PendingReceiver<mojom::PhishingModelSetter> receiver) {
- mojo::MakeSelfOwnedReceiver(std::make_unique<PhishingClassifierFilter>(),
- std::move(receiver));
-}
-
-PhishingClassifierFilter::PhishingClassifierFilter() {}
-
-PhishingClassifierFilter::~PhishingClassifierFilter() {}
-
-void PhishingClassifierFilter::SetPhishingModel(const std::string& model) {
- safe_browsing::Scorer* scorer = NULL;
- // An empty model string means we should disable client-side phishing
- // detection.
- if (!model.empty()) {
- scorer = safe_browsing::Scorer::Create(model);
- if (!scorer) {
- DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";
- return;
- }
- }
- for (auto* delegate : PhishingClassifierDelegates())
- delegate->SetPhishingScorer(scorer);
- g_phishing_scorer.Get().reset(scorer);
-}
-
-// static
-PhishingClassifierDelegate* PhishingClassifierDelegate::Create(
- content::RenderFrame* render_frame,
- PhishingClassifier* classifier) {
- // Private constructor and public static Create() method to facilitate
- // stubbing out this class for binary-size reduction purposes.
- return new PhishingClassifierDelegate(render_frame, classifier);
-}
-
PhishingClassifierDelegate::PhishingClassifierDelegate(
content::RenderFrame* render_frame,
PhishingClassifier* classifier)
@@ -99,8 +61,7 @@ PhishingClassifierDelegate::PhishingClassifierDelegate(
is_classifying_(false) {
PhishingClassifierDelegates().insert(this);
if (!classifier) {
- classifier =
- new PhishingClassifier(render_frame, new FeatureExtractorClock());
+ classifier = new PhishingClassifier(render_frame);
}
classifier_.reset(classifier);
@@ -118,6 +79,29 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() {
PhishingClassifierDelegates().erase(this);
}
+void PhishingClassifierDelegate::SetPhishingModel(const std::string& model) {
+ safe_browsing::Scorer* scorer = nullptr;
+ // An empty model string means we should disable client-side phishing
+ // detection.
+ if (!model.empty()) {
+ scorer = safe_browsing::Scorer::Create(model);
+ if (!scorer)
+ return;
+ }
+ for (auto* delegate : PhishingClassifierDelegates())
+ delegate->SetPhishingScorer(scorer);
+ g_phishing_scorer.Get().reset(scorer);
+}
+
+// static
+PhishingClassifierDelegate* PhishingClassifierDelegate::Create(
+ content::RenderFrame* render_frame,
+ PhishingClassifier* classifier) {
+ // Private constructor and public static Create() method to facilitate
+ // stubbing out this class for binary-size reduction purposes.
+ return new PhishingClassifierDelegate(render_frame, classifier);
+}
+
void PhishingClassifierDelegate::SetPhishingScorer(
const safe_browsing::Scorer* scorer) {
if (is_classifying_) {
@@ -159,23 +143,22 @@ void PhishingClassifierDelegate::StartPhishingDetection(
}
void PhishingClassifierDelegate::DidCommitProvisionalLoad(
- bool is_same_document_navigation,
ui::PageTransition transition) {
blink::WebLocalFrame* frame = render_frame()->GetWebFrame();
// A new page is starting to load, so cancel classificaiton.
- //
+ CancelPendingClassification(NAVIGATE_AWAY);
+ if (!frame->Parent())
+ last_main_frame_transition_ = transition;
+}
+
+void PhishingClassifierDelegate::DidFinishSameDocumentNavigation() {
// TODO(bryner): We shouldn't need to cancel classification if the navigation
// is within the same document. However, if we let classification continue in
// this case, we need to properly deal with the fact that PageCaptured will
// be called again for the same-document navigation. We need to be sure not
// to swap out the page text while the term feature extractor is still
// running.
- CancelPendingClassification(is_same_document_navigation ? NAVIGATE_WITHIN_PAGE
- : NAVIGATE_AWAY);
- if (frame->Parent())
- return;
-
- last_main_frame_transition_ = transition;
+ CancelPendingClassification(NAVIGATE_WITHIN_PAGE);
}
void PhishingClassifierDelegate::PageCaptured(base::string16* page_text,
@@ -196,8 +179,8 @@ void PhishingClassifierDelegate::PageCaptured(base::string16* page_text,
have_page_text_ = true;
GURL stripped_last_load_url(StripRef(last_finished_load_url_));
+ // Check if toplevel URL has changed.
if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) {
- DVLOG(2) << "Toplevel URL is unchanged, not starting classification.";
return;
}
@@ -225,8 +208,6 @@ void PhishingClassifierDelegate::CancelPendingClassification(
void PhishingClassifierDelegate::ClassificationDone(
const ClientPhishingRequest& verdict) {
- DVLOG(2) << "Phishy verdict = " << verdict.is_phishing()
- << " score = " << verdict.client_score();
is_phishing_detection_running_ = false;
if (callback_.is_null())
return;
@@ -255,7 +236,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() {
// classified at all (as opposed to deferring it until we get an IPC or
// the load completes), we discard the page text since it won't be needed.
if (!classifier_->is_ready()) {
- DVLOG(2) << "Not starting classification, no Scorer created.";
is_phishing_detection_running_ = false;
// Keep classifier_page_text_, in case a Scorer is set later.
if (!callback_.is_null())
@@ -268,7 +248,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() {
// Skip loads from session history navigation. However, update the
// last URL sent to the classifier, so that we'll properly detect
// same-document navigations.
- DVLOG(2) << "Not starting classification for back/forward navigation";
last_url_sent_to_classifier_ = last_finished_load_url_;
classifier_page_text_.clear(); // we won't need this.
have_page_text_ = false;
@@ -281,7 +260,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() {
GURL stripped_last_load_url(StripRef(last_finished_load_url_));
if (!have_page_text_) {
- DVLOG(2) << "Not starting classification, there is no page text ready.";
RecordEvent(SBPhishingClassifierEvent::kPageTextNotLoaded);
return;
}
@@ -292,15 +270,11 @@ void PhishingClassifierDelegate::MaybeStartClassification() {
// so defer classification for now. Note: the ref does not affect
// any of the browser's preclassification checks, so we don't require it
// to match.
- DVLOG(2) << "Not starting classification, last url from browser is "
- << last_url_received_from_browser_ << ", last finished load is "
- << last_finished_load_url_;
// Keep classifier_page_text_, in case the browser notifies us later that
// we should classify the URL.
return;
}
- DVLOG(2) << "Starting classification for " << last_finished_load_url_;
last_url_sent_to_classifier_ = last_finished_load_url_;
is_classifying_ = true;
classifier_->BeginClassification(
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
index 9910fc79509..80aa1193e95 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h
@@ -38,21 +38,6 @@ enum class SBPhishingClassifierEvent {
kMaxValue = kDestructedBeforeClassificationDone,
};
-class PhishingClassifierFilter : public mojom::PhishingModelSetter {
- public:
- PhishingClassifierFilter();
- ~PhishingClassifierFilter() override;
-
- static void Create(
- mojo::PendingReceiver<mojom::PhishingModelSetter> receiver);
-
- private:
- // mojom::PhishingModelSetter
- void SetPhishingModel(const std::string& model) override;
-
- DISALLOW_COPY_AND_ASSIGN(PhishingClassifierFilter);
-};
-
class PhishingClassifierDelegate : public content::RenderFrameObserver,
public mojom::PhishingDetector {
public:
@@ -63,6 +48,9 @@ class PhishingClassifierDelegate : public content::RenderFrameObserver,
PhishingClassifier* classifier);
~PhishingClassifierDelegate() override;
+ // mojom::PhishingDetector
+ void SetPhishingModel(const std::string& model) override;
+
// Called by the RenderFrame once there is a phishing scorer available.
// The scorer is passed on to the classifier.
void SetPhishingScorer(const safe_browsing::Scorer* scorer);
@@ -78,10 +66,11 @@ class PhishingClassifierDelegate : public content::RenderFrameObserver,
// Called by the RenderFrame when a page has started loading in the given
// WebFrame. Typically, this will cause any pending classification to be
- // cancelled. However, if the navigation is within the same page, we
- // continue running the current classification.
- void DidCommitProvisionalLoad(bool is_same_document_navigation,
- ui::PageTransition transition) override;
+ // cancelled.
+ void DidCommitProvisionalLoad(ui::PageTransition transition) override;
+ // Called by the RenderFrame when the same-document navigation has been
+ // committed. We continue running the current classification.
+ void DidFinishSameDocumentNavigation() override;
private:
friend class PhishingClassifierDelegateTest;
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
index 2acec9d451c..0ae43da87c5 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc
@@ -37,7 +37,7 @@ namespace {
class MockPhishingClassifier : public PhishingClassifier {
public:
explicit MockPhishingClassifier(content::RenderFrame* render_frame)
- : PhishingClassifier(render_frame, NULL /* clock */) {}
+ : PhishingClassifier(render_frame) {}
~MockPhishingClassifier() override {}
@@ -229,6 +229,25 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) {
EXPECT_CALL(*classifier_, CancelPendingClassification());
}
+TEST_F(PhishingClassifierDelegateTest, NoPhishingModel) {
+ ASSERT_FALSE(classifier_->is_ready());
+ delegate_->SetPhishingModel("");
+ // The scorer is nullptr so the classifier should still not be ready.
+ ASSERT_FALSE(classifier_->is_ready());
+}
+
+TEST_F(PhishingClassifierDelegateTest, HasPhishingModel) {
+ ASSERT_FALSE(classifier_->is_ready());
+
+ ClientSideModel model;
+ model.set_max_words_per_term(1);
+ delegate_->SetPhishingModel(model.SerializeAsString());
+ ASSERT_TRUE(classifier_->is_ready());
+
+ // The delegate will cancel pending classification on destruction.
+ EXPECT_CALL(*classifier_, CancelPendingClassification());
+}
+
TEST_F(PhishingClassifierDelegateTest, NoScorer) {
// For this test, we'll create the delegate with no scorer available yet.
ASSERT_FALSE(classifier_->is_ready());
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
index f1a9fb37d82..c31a97320cb 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
@@ -9,13 +9,12 @@
#include "base/bind.h"
#include "base/compiler_specific.h"
#include "base/location.h"
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/string_util.h"
#include "base/threading/thread_task_runner_handle.h"
+#include "base/time/default_tick_clock.h"
#include "base/time/time.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "content/public/renderer/render_view.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
@@ -102,16 +101,17 @@ struct PhishingDOMFeatureExtractor::FrameData {
std::string domain;
};
-PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor(
- FeatureExtractorClock* clock)
- : clock_(clock) {
+PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor()
+ : clock_(base::DefaultTickClock::GetInstance()) {
Clear();
}
PhishingDOMFeatureExtractor::~PhishingDOMFeatureExtractor() {
// The RenderView should have called CancelPendingExtraction() before
// we are destroyed.
- CheckNoPendingExtraction();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!cur_frame_data_.get());
+ DCHECK(cur_document_.IsNull());
}
void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document,
@@ -119,7 +119,9 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document,
DoneCallback done_callback) {
// The RenderView should have called CancelPendingExtraction() before
// starting a new extraction, so DCHECK this.
- CheckNoPendingExtraction();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!cur_frame_data_.get());
+ DCHECK(cur_document_.IsNull());
// However, in an opt build, we will go ahead and clean up the pending
// extraction so that we can start in a known state.
CancelPendingExtraction();
@@ -127,7 +129,7 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document,
features_ = features;
done_callback_ = std::move(done_callback);
- page_feature_state_.reset(new PageFeatureState(clock_->Now()));
+ page_feature_state_ = std::make_unique<PageFeatureState>(clock_->NowTicks());
cur_document_ = document;
base::ThreadTaskRunnerHandle::Get()->PostTask(
@@ -145,7 +147,7 @@ void PhishingDOMFeatureExtractor::CancelPendingExtraction() {
void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
DCHECK(page_feature_state_.get());
++page_feature_state_->num_iterations;
- base::TimeTicks current_chunk_start_time = clock_->Now();
+ base::TimeTicks current_chunk_start_time = clock_->NowTicks();
if (cur_document_.IsNull()) {
// This will only happen if we weren't able to get the document for the
@@ -166,7 +168,7 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
// modified between our chunks of work. Log how long this takes, so we
// can tell if it's too slow.
UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime",
- clock_->Now() - current_chunk_start_time);
+ clock_->NowTicks() - current_chunk_start_time);
} else {
// We just moved to a new frame, so update our frame state
// and advance to the first element.
@@ -190,10 +192,9 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
if (++num_elements >= kClockCheckGranularity) {
num_elements = 0;
- base::TimeTicks now = clock_->Now();
+ base::TimeTicks now = clock_->NowTicks();
if (now - page_feature_state_->start_time >=
base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) {
- DLOG(ERROR) << "Feature extraction took too long, giving up";
// We expect this to happen infrequently, so record when it does.
UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.DOMFeatureTimeout", 1);
RunCallback(false);
@@ -233,20 +234,16 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
void PhishingDOMFeatureExtractor::HandleLink(
const blink::WebElement& element) {
// Count the number of times we link to a different host.
- if (!element.HasAttribute("href")) {
- DVLOG(1) << "Skipping anchor tag with no href";
+ if (!element.HasAttribute("href"))
return;
- }
// Retrieve the link and resolve the link in case it's relative.
blink::WebURL full_url = CompleteURL(element, element.GetAttribute("href"));
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
- if (domain.empty()) {
- DVLOG(1) << "Could not extract domain from link: " << full_url;
+ if (domain.empty())
return;
- }
if (is_external) {
++page_feature_state_->external_links;
@@ -279,10 +276,8 @@ void PhishingDOMFeatureExtractor::HandleForm(
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
- if (domain.empty()) {
- DVLOG(1) << "Could not extract domain from form action: " << full_url;
+ if (domain.empty())
return;
- }
if (is_external) {
++page_feature_state_->action_other_domain;
@@ -292,22 +287,16 @@ void PhishingDOMFeatureExtractor::HandleForm(
void PhishingDOMFeatureExtractor::HandleImage(
const blink::WebElement& element) {
- if (!element.HasAttribute("src")) {
- DVLOG(1) << "Skipping img tag with no src";
- }
-
// Record whether the image points to a different domain.
blink::WebURL full_url = CompleteURL(element, element.GetAttribute("src"));
std::string domain;
bool is_external = IsExternalDomain(full_url, &domain);
- if (domain.empty()) {
- DVLOG(1) << "Could not extract domain from image src: " << full_url;
+ if (domain.empty())
return;
- }
- if (is_external) {
+ if (is_external)
++page_feature_state_->img_other_domain;
- }
+
++page_feature_state_->total_imgs;
}
@@ -340,17 +329,6 @@ void PhishingDOMFeatureExtractor::HandleScript(
++page_feature_state_->num_script_tags;
}
-void PhishingDOMFeatureExtractor::CheckNoPendingExtraction() {
- DCHECK(done_callback_.is_null());
- DCHECK(!cur_frame_data_.get());
- DCHECK(cur_document_.IsNull());
- if (!done_callback_.is_null() || cur_frame_data_.get() ||
- !cur_document_.IsNull()) {
- LOG(ERROR) << "Extraction in progress, missing call to "
- << "CancelPendingExtraction";
- }
-}
-
void PhishingDOMFeatureExtractor::RunCallback(bool success) {
// Record some timing stats that we can use to evaluate feature extraction
// performance. These include both successful and failed extractions.
@@ -358,7 +336,7 @@ void PhishingDOMFeatureExtractor::RunCallback(bool success) {
UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.DOMFeatureIterations",
page_feature_state_->num_iterations);
UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureTotalTime",
- clock_->Now() - page_feature_state_->start_time);
+ clock_->NowTicks() - page_feature_state_->start_time);
DCHECK(!done_callback_.is_null());
std::move(done_callback_).Run(success);
@@ -366,9 +344,9 @@ void PhishingDOMFeatureExtractor::RunCallback(bool success) {
}
void PhishingDOMFeatureExtractor::Clear() {
- features_ = NULL;
+ features_ = nullptr;
done_callback_.Reset();
- cur_frame_data_.reset(NULL);
+ cur_frame_data_.reset(nullptr);
cur_document_.Reset();
}
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
index 6f4e2c1503c..4639f620f2c 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
+++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
@@ -16,6 +16,7 @@
#include "base/callback.h"
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
+#include "base/time/tick_clock.h"
#include "third_party/blink/public/web/web_document.h"
class GURL;
@@ -25,7 +26,6 @@ class WebElement;
}
namespace safe_browsing {
-class FeatureExtractorClock;
class FeatureMap;
class PhishingDOMFeatureExtractor {
@@ -35,9 +35,7 @@ class PhishingDOMFeatureExtractor {
typedef base::OnceCallback<void(bool)> DoneCallback;
// Creates a PhishingDOMFeatureExtractor instance.
- // |clock| is used for timing feature extractor operations, and may be
- // mocked for testing. The caller maintains ownership of the clock.
- explicit PhishingDOMFeatureExtractor(FeatureExtractorClock* clock);
+ PhishingDOMFeatureExtractor();
virtual ~PhishingDOMFeatureExtractor();
// Begins extracting features into the given FeatureMap for the page.
@@ -55,6 +53,8 @@ class PhishingDOMFeatureExtractor {
// is unloaded or the PhishingDOMFeatureExtractor is destroyed.
void CancelPendingExtraction();
+ void SetTickClockForTesting(const base::TickClock* clock) { clock_ = clock; }
+
private:
struct FrameData;
struct PageFeatureState;
@@ -88,11 +88,6 @@ class PhishingDOMFeatureExtractor {
void HandleInput(const blink::WebElement& element);
void HandleScript(const blink::WebElement& element);
- // Helper to verify that there is no pending feature extraction. Dies in
- // debug builds if the state is not as expected. This is a no-op in release
- // builds.
- void CheckNoPendingExtraction();
-
// Runs |done_callback_| and then clears all internal state.
void RunCallback(bool success);
@@ -122,9 +117,7 @@ class PhishingDOMFeatureExtractor {
// description of which features are computed.
void InsertFeatures();
-
- // Non-owned pointer to our clock.
- FeatureExtractorClock* clock_;
+ const base::TickClock* clock_;
// The output parameters from the most recent call to ExtractFeatures().
FeatureMap* features_; // The caller keeps ownership of this.
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc
index e64fd770af3..0586f5a9a5a 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc
@@ -13,7 +13,6 @@
#include "base/time/time.h"
#include "chrome/renderer/chrome_content_renderer_client.h"
#include "chrome/renderer/safe_browsing/features.h"
-#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "chrome/test/base/chrome_render_view_test.h"
#include "content/public/common/content_switches.h"
@@ -30,22 +29,28 @@
#include "third_party/blink/public/web/web_script_source.h"
#include "ui/native_theme/native_theme_features.h"
+using blink::WebRuntimeFeatures;
using ::testing::DoAll;
using ::testing::Invoke;
using ::testing::Return;
-using blink::WebRuntimeFeatures;
+using ::testing::StrictMock;
namespace safe_browsing {
+class MockTickClock : public base::TickClock {
+ public:
+ MockTickClock() = default;
+ ~MockTickClock() override = default;
+
+ MOCK_CONST_METHOD0(NowTicks, base::TimeTicks());
+};
+
// TestPhishingDOMFeatureExtractor has nearly identical behavior as
// PhishingDOMFeatureExtractor, except the IsExternalDomain() and
// CompleteURL() functions. This is to work around the fact that
// ChromeRenderViewTest object does not know where the html content is hosted.
class TestPhishingDOMFeatureExtractor : public PhishingDOMFeatureExtractor {
public:
- explicit TestPhishingDOMFeatureExtractor(FeatureExtractorClock* clock)
- : PhishingDOMFeatureExtractor(clock) {}
-
void SetDocumentDomain(std::string domain) { base_domain_ = domain; }
void SetURLToFrameDomainCheckingMap(
@@ -106,9 +111,6 @@ class TestPhishingDOMFeatureExtractor : public PhishingDOMFeatureExtractor {
const std::string frame_domain = it->second;
full_url = GURL("http://" + it->second).Resolve(partial_url.Utf8());
url_to_frame_domain_map_[full_url.spec()] = it->second;
- } else {
- NOTREACHED() << "Testing input setup is incorrect. "
- "Please check url_to_frame_domain_map_ setup.";
}
}
return blink::WebURL(full_url);
@@ -191,7 +193,7 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest {
ChromeRenderViewTest::SetUp();
WebRuntimeFeatures::EnableOverlayScrollbars(
ui::IsOverlayScrollbarEnabled());
- extractor_.reset(new TestPhishingDOMFeatureExtractor(&clock_));
+ extractor_ = std::make_unique<TestPhishingDOMFeatureExtractor>();
}
void TearDown() override {
@@ -218,7 +220,6 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest {
"document.body.removeChild(document.getElementById('frame1'));"));
}
- MockFeatureExtractorClock clock_;
bool success_;
std::unique_ptr<TestPhishingDOMFeatureExtractor> extractor_;
scoped_refptr<content::MessageLoopRunner> message_loop_;
@@ -226,9 +227,6 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest {
};
TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
- // This test doesn't exercise the extraction timing.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
-
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
@@ -296,9 +294,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
}
TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
- // This test doesn't exercise the extraction timing.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
-
FeatureMap expected_features;
expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
@@ -334,9 +329,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
}
TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
- // This test doesn't exercise the extraction timing.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
-
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
@@ -370,10 +362,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
// iframe2 / \ iframe1
// \ iframe3
TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
- // This test doesn't exercise the extraction timing.
- // Test that features are aggregated across all frames.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
-
const char urlprefix[] = "data:text/html;charset=utf-8,";
std::unordered_map<std::string, std::string> url_iframe_map;
std::string iframe1_nested_html(
@@ -443,8 +431,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
}
TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
- // For this test, we'll cause the feature extraction to run multiple
- // iterations by incrementing the clock.
+ StrictMock<MockTickClock> tick_clock;
// This page has a total of 50 elements. For the external forms feature to
// be computed correctly, the extractor has to examine the whole document.
@@ -462,7 +449,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
// Note that this assumes kClockCheckGranularity = 10 and
// kMaxTimePerChunkMs = 10.
base::TimeTicks now = base::TimeTicks::Now();
- EXPECT_CALL(clock_, Now())
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -489,6 +476,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
// A final time check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
+ extractor_->SetTickClockForTesting(&tick_clock);
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
@@ -502,13 +490,13 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
ExtractFeatures("host.com", html, &features);
ExpectFeatureMapsAreEqual(features, expected_features);
// Make sure none of the mock expectations carry over to the next test.
- ::testing::Mock::VerifyAndClearExpectations(&clock_);
+ ::testing::Mock::VerifyAndClearExpectations(&tick_clock);
// Now repeat the test with the same page, but advance the clock faster so
// that the extraction time exceeds the maximum total time for the feature
// extractor. Extraction should fail. Note that this assumes
// kMaxTotalTimeMs = 500.
- EXPECT_CALL(clock_, Now())
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -542,7 +530,8 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
GURL iframe1_url(urlprefix + iframe1_html);
base::TimeTicks now = base::TimeTicks::Now();
- EXPECT_CALL(clock_, Now())
+ StrictMock<MockTickClock> tick_clock;
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -559,6 +548,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
// A final time check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
+ extractor_->SetTickClockForTesting(&tick_clock);
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageHasForms);
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
index 2bea4a2ea43..61d045735cb 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc
@@ -15,13 +15,12 @@
#include "base/i18n/break_iterator.h"
#include "base/i18n/case_conversion.h"
#include "base/location.h"
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/utf_string_conversions.h"
#include "base/threading/thread_task_runner_handle.h"
+#include "base/time/default_tick_clock.h"
#include "base/time/time.h"
-#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
#include "crypto/sha2.h"
@@ -74,11 +73,8 @@ struct PhishingTermFeatureExtractor::ExtractionState {
std::unique_ptr<base::i18n::BreakIterator> i(new base::i18n::BreakIterator(
text, base::i18n::BreakIterator::BREAK_WORD));
- if (i->Init()) {
+ if (i->Init())
iterator = std::move(i);
- } else {
- DLOG(ERROR) << "failed to open iterator";
- }
}
};
@@ -88,22 +84,22 @@ PhishingTermFeatureExtractor::PhishingTermFeatureExtractor(
size_t max_words_per_term,
uint32_t murmurhash3_seed,
size_t max_shingles_per_page,
- size_t shingle_size,
- FeatureExtractorClock* clock)
+ size_t shingle_size)
: page_term_hashes_(page_term_hashes),
page_word_hashes_(page_word_hashes),
max_words_per_term_(max_words_per_term),
murmurhash3_seed_(murmurhash3_seed),
max_shingles_per_page_(max_shingles_per_page),
shingle_size_(shingle_size),
- clock_(clock) {
+ clock_(base::DefaultTickClock::GetInstance()) {
Clear();
}
PhishingTermFeatureExtractor::~PhishingTermFeatureExtractor() {
// The RenderView should have called CancelPendingExtraction() before
// we are destroyed.
- CheckNoPendingExtraction();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!state_.get());
}
void PhishingTermFeatureExtractor::ExtractFeatures(
@@ -113,7 +109,8 @@ void PhishingTermFeatureExtractor::ExtractFeatures(
DoneCallback done_callback) {
// The RenderView should have called CancelPendingExtraction() before
// starting a new extraction, so DCHECK this.
- CheckNoPendingExtraction();
+ DCHECK(done_callback_.is_null());
+ DCHECK(!state_.get());
// However, in an opt build, we will go ahead and clean up the pending
// extraction so that we can start in a known state.
CancelPendingExtraction();
@@ -122,7 +119,7 @@ void PhishingTermFeatureExtractor::ExtractFeatures(
features_ = features;
shingle_hashes_ = shingle_hashes, done_callback_ = std::move(done_callback);
- state_.reset(new ExtractionState(*page_text_, clock_->Now()));
+ state_ = std::make_unique<ExtractionState>(*page_text_, clock_->NowTicks());
base::ThreadTaskRunnerHandle::Get()->PostTask(
FROM_HERE,
base::BindOnce(&PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout,
@@ -138,7 +135,7 @@ void PhishingTermFeatureExtractor::CancelPendingExtraction() {
void PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout() {
DCHECK(state_.get());
++state_->num_iterations;
- base::TimeTicks current_chunk_start_time = clock_->Now();
+ base::TimeTicks current_chunk_start_time = clock_->NowTicks();
if (!state_->iterator.get()) {
// We failed to initialize the break iterator, so stop now.
@@ -158,10 +155,9 @@ void PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout() {
if (num_words >= kClockCheckGranularity) {
num_words = 0;
- base::TimeTicks now = clock_->Now();
+ base::TimeTicks now = clock_->NowTicks();
if (now - state_->start_time >=
base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) {
- DLOG(ERROR) << "Feature extraction took too long, giving up";
// We expect this to happen infrequently, so record when it does.
UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.TermFeatureTimeout", 1);
RunCallback(false);
@@ -261,15 +257,6 @@ void PhishingTermFeatureExtractor::HandleWord(
}
}
-void PhishingTermFeatureExtractor::CheckNoPendingExtraction() {
- DCHECK(done_callback_.is_null());
- DCHECK(!state_.get());
- if (!done_callback_.is_null() || state_.get()) {
- LOG(ERROR) << "Extraction in progress, missing call to "
- << "CancelPendingExtraction";
- }
-}
-
void PhishingTermFeatureExtractor::RunCallback(bool success) {
// Record some timing stats that we can use to evaluate feature extraction
// performance. These include both successful and failed extractions.
@@ -277,7 +264,7 @@ void PhishingTermFeatureExtractor::RunCallback(bool success) {
UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.TermFeatureIterations",
state_->num_iterations);
UMA_HISTOGRAM_TIMES("SBClientPhishing.TermFeatureTotalTime",
- clock_->Now() - state_->start_time);
+ clock_->NowTicks() - state_->start_time);
DCHECK(!done_callback_.is_null());
std::move(done_callback_).Run(success);
@@ -285,11 +272,11 @@ void PhishingTermFeatureExtractor::RunCallback(bool success) {
}
void PhishingTermFeatureExtractor::Clear() {
- page_text_ = NULL;
- features_ = NULL;
- shingle_hashes_ = NULL;
+ page_text_ = nullptr;
+ features_ = nullptr;
+ shingle_hashes_ = nullptr;
done_callback_.Reset();
- state_.reset(NULL);
+ state_.reset(nullptr);
}
} // namespace safe_browsing
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
index d0cc48d7033..341f60704a2 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
+++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h
@@ -29,9 +29,9 @@
#include "base/memory/weak_ptr.h"
#include "base/strings/string16.h"
#include "base/strings/string_piece.h"
+#include "base/time/tick_clock.h"
namespace safe_browsing {
-class FeatureExtractorClock;
class FeatureMap;
class PhishingTermFeatureExtractor {
@@ -63,8 +63,7 @@ class PhishingTermFeatureExtractor {
size_t max_words_per_term,
uint32_t murmurhash3_seed,
size_t max_shingles_per_page,
- size_t shingle_size,
- FeatureExtractorClock* clock);
+ size_t shingle_size);
~PhishingTermFeatureExtractor();
// Begins extracting features from |page_text| into the given FeatureMap.
@@ -90,6 +89,8 @@ class PhishingTermFeatureExtractor {
// is unloaded or the PhishingTermFeatureExtractor is destroyed.
void CancelPendingExtraction();
+ void SetTickClockForTesting(const base::TickClock* clock) { clock_ = clock; }
+
private:
struct ExtractionState;
@@ -115,11 +116,6 @@ class PhishingTermFeatureExtractor {
// Handles a single word in the page text.
void HandleWord(const base::StringPiece16& word);
- // Helper to verify that there is no pending feature extraction. Dies in
- // debug builds if the state is not as expected. This is a no-op in release
- // builds.
- void CheckNoPendingExtraction();
-
// Runs |done_callback_| and then clears all internal state.
void RunCallback(bool success);
@@ -149,7 +145,7 @@ class PhishingTermFeatureExtractor {
const size_t shingle_size_;
// Non-owned pointer to our clock.
- FeatureExtractorClock* clock_;
+ const base::TickClock* clock_;
// The output parameters from the most recent call to ExtractFeatures().
const base::string16* page_text_; // The caller keeps ownership of this.
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc
index c1b38303cbd..53fc9a643fb 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc
@@ -24,7 +24,6 @@
#include "base/time/time.h"
#include "build/build_config.h"
#include "chrome/renderer/safe_browsing/features.h"
-#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/murmurhash3_util.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "crypto/sha2.h"
@@ -33,11 +32,20 @@
using base::ASCIIToUTF16;
using ::testing::Return;
+using ::testing::StrictMock;
static const uint32_t kMurmurHash3Seed = 2777808611U;
namespace safe_browsing {
+class MockTickClock : public base::TickClock {
+ public:
+ MockTickClock() = default;
+ ~MockTickClock() override = default;
+
+ MOCK_CONST_METHOD0(NowTicks, base::TimeTicks());
+};
+
class PhishingTermFeatureExtractorTest : public ::testing::Test {
protected:
void SetUp() override {
@@ -80,14 +88,9 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test {
}
void ResetExtractor(size_t max_shingles_per_page) {
- extractor_.reset(new PhishingTermFeatureExtractor(
- &term_hashes_,
- &word_hashes_,
- 3 /* max_words_per_term */,
- kMurmurHash3Seed,
- max_shingles_per_page,
- 4 /* shingle_size */,
- &clock_));
+ extractor_ = std::make_unique<PhishingTermFeatureExtractor>(
+ &term_hashes_, &word_hashes_, 3 /* max_words_per_term */,
+ kMurmurHash3Seed, max_shingles_per_page, 4 /* shingle_size */);
}
// Runs the TermFeatureExtractor on |page_text|, waiting for the
@@ -133,7 +136,6 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test {
base::test::SingleThreadTaskEnvironment task_environment_;
std::unique_ptr<base::RunLoop> active_run_loop_;
- MockFeatureExtractorClock clock_;
std::unique_ptr<PhishingTermFeatureExtractor> extractor_;
std::unordered_set<std::string> term_hashes_;
std::unordered_set<uint32_t> word_hashes_;
@@ -141,9 +143,6 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test {
};
TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) {
- // This test doesn't exercise the extraction timing.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
-
base::string16 page_text = ASCIIToUTF16("blah");
FeatureMap expected_features; // initially empty
std::set<uint32_t> expected_shingle_hashes;
@@ -306,7 +305,8 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) {
// Note that this assumes kClockCheckGranularity = 5 and
// kMaxTimePerChunkMs = 10.
base::TimeTicks now = base::TimeTicks::Now();
- EXPECT_CALL(clock_, Now())
+ StrictMock<MockTickClock> tick_clock;
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -328,6 +328,7 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) {
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28)))
// A final check for the histograms.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)));
+ extractor_->SetTickClockForTesting(&tick_clock);
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kPageTerm +
@@ -396,13 +397,13 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) {
ExpectFeatureMapsAreEqual(features, expected_features);
EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes));
// Make sure none of the mock expectations carry over to the next test.
- ::testing::Mock::VerifyAndClearExpectations(&clock_);
+ ::testing::Mock::VerifyAndClearExpectations(&tick_clock);
// Now repeat the test with the same text, but advance the clock faster so
// that the extraction time exceeds the maximum total time for the feature
// extractor. Extraction should fail. Note that this assumes
// kMaxTotalTimeMs = 500.
- EXPECT_CALL(clock_, Now())
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -429,7 +430,8 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) {
}
base::TimeTicks now = base::TimeTicks::Now();
- EXPECT_CALL(clock_, Now())
+ StrictMock<MockTickClock> tick_clock;
+ EXPECT_CALL(tick_clock, NowTicks())
// Time check at the start of extraction.
.WillOnce(Return(now))
// Time check at the start of the first chunk of work.
@@ -439,6 +441,7 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) {
// Time check after the next 5 words. This should be greater than
// kMaxTimePerChunkMs so that we stop and schedule extraction for later.
.WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14)));
+ extractor_->SetTickClockForTesting(&tick_clock);
FeatureMap features;
std::set<uint32_t> shingle_hashes;
@@ -454,7 +457,8 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) {
shingle_hashes.clear();
// This part doesn't exercise the extraction timing.
- EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
+ EXPECT_CALL(tick_clock, NowTicks())
+ .WillRepeatedly(Return(base::TimeTicks::Now()));
// Now extract normally and make sure nothing breaks.
EXPECT_TRUE(ExtractFeatures(page_text.get(), &features, &shingle_hashes));
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
index 931c5203edd..403d1d060f7 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc
@@ -8,7 +8,6 @@
#include <string>
#include <vector>
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
@@ -44,8 +43,8 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url,
host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
+ // Check if TLD exists for host.
if (registry_length == 0 || registry_length == std::string::npos) {
- DVLOG(1) << "Could not find TLD for host: " << host;
return false;
}
DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but "
@@ -59,8 +58,8 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url,
host.erase(tld_start - 1);
std::vector<std::string> host_tokens = base::SplitString(
host, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
+ // Check if domain exists for host.
if (host_tokens.empty()) {
- DVLOG(1) << "Could not find domain for host: " << host;
return false;
}
if (!features->AddBooleanFeature(features::kUrlDomainToken +
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
index e5412a7bd4e..c87ab3daf88 100644
--- a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
+++ b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
@@ -6,6 +6,8 @@
#include <string>
#include <vector>
+#include "base/format_macros.h"
+#include "base/strings/stringprintf.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "chrome/renderer/safe_browsing/test_utils.h"
#include "testing/gmock/include/gmock/gmock.h"
@@ -25,10 +27,24 @@ class PhishingUrlFeatureExtractorTest : public ::testing::Test {
PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full,
tokens);
}
+
+ void FillFeatureMap(size_t count, FeatureMap* features) {
+ for (size_t i = 0; i < count; ++i) {
+ EXPECT_TRUE(
+ features->AddBooleanFeature(base::StringPrintf("Feature%" PRIuS, i)));
+ }
+ }
};
TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
std::string url = "http://123.0.0.1/mydocuments/a.file.html";
+ FeatureMap features;
+
+ // If feature map is already full, features cannot be extracted.
+ FillFeatureMap(FeatureMap::kMaxFeatureMapSize, &features);
+ ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+ features.Clear();
+
FeatureMap expected_features;
expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress);
expected_features.AddBooleanFeature(features::kUrlPathToken +
@@ -38,9 +54,13 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
expected_features.AddBooleanFeature(features::kUrlPathToken +
std::string("html"));
- FeatureMap features;
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
+ // If feature map is already full, features cannot be extracted.
+ features.Clear();
+ FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features);
+ ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+ features.Clear();
url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear";
expected_features.Clear();
@@ -61,6 +81,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
+ features.Clear();
+ // If feature map is already full, features cannot be extracted.
+ FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 5, &features);
+ ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://justadomain.com/";
expected_features.Clear();
@@ -72,6 +96,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
+ // If feature map is already full, features cannot be extracted.
+ features.Clear();
+ FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features);
+ ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://witharef.com/#abc";
expected_features.Clear();
@@ -96,6 +124,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
features.Clear();
ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
ExpectFeatureMapsAreEqual(features, expected_features);
+ // If feature map is already full, features cannot be extracted.
+ features.Clear();
+ FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 2, &features);
+ ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
url = "http://unrecognized.tld/";
EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
diff --git a/chromium/chrome/renderer/safe_browsing/scorer.cc b/chromium/chrome/renderer/safe_browsing/scorer.cc
index 8e37a934f9a..fc592db98c5 100644
--- a/chromium/chrome/renderer/safe_browsing/scorer.cc
+++ b/chromium/chrome/renderer/safe_browsing/scorer.cc
@@ -10,10 +10,10 @@
#include <unordered_map>
#include <unordered_set>
-#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_piece.h"
#include "chrome/renderer/safe_browsing/features.h"
+#include "components/safe_browsing/content/password_protection/visual_utils.h"
#include "components/safe_browsing/core/proto/client_model.pb.h"
namespace {
@@ -57,14 +57,12 @@ Scorer::~Scorer() {}
Scorer* Scorer::Create(const base::StringPiece& model_str) {
std::unique_ptr<Scorer> scorer(new Scorer());
ClientSideModel& model = scorer->model_;
+ // Parse the phishing model.
if (!model.ParseFromArray(model_str.data(), model_str.size())) {
- DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is "
- << "invalid.";
RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR);
return NULL;
} else if (!model.IsInitialized()) {
- DLOG(ERROR) << "Unable to parse phishing model. The model is missing "
- << "some required fields. Maybe the .proto file changed?";
+ // The model may be missing some required fields.
RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS);
return NULL;
}
@@ -86,6 +84,32 @@ double Scorer::ComputeScore(const FeatureMap& features) const {
return LogOdds2Prob(logodds);
}
+bool Scorer::GetMatchingVisualTargets(const SkBitmap& bitmap,
+ ClientPhishingRequest* request) const {
+ bool has_match = false;
+ for (const VisualTarget& target : model_.vision_model().targets()) {
+ base::Optional<VisionMatchResult> result =
+ visual_utils::IsVisualMatch(bitmap, target);
+ if (result.has_value()) {
+ *request->add_vision_match() = result.value();
+ has_match = true;
+ }
+ }
+
+ if (model_.has_vision_model()) {
+ // Populate these fields for telementry purposes. They will be filtered in
+ // the browser process if they are not needed.
+ VisualFeatures::BlurredImage blurred_image;
+ if (visual_utils::GetBlurredImage(bitmap, &blurred_image)) {
+ request->set_screenshot_phash(
+ visual_utils::GetHashFromBlurredImage(blurred_image));
+ request->set_phash_dimension_size(48);
+ }
+ }
+
+ return has_match;
+}
+
int Scorer::model_version() const {
return model_.version();
}
diff --git a/chromium/chrome/renderer/safe_browsing/scorer.h b/chromium/chrome/renderer/safe_browsing/scorer.h
index 5fda83d4817..597b4cf047b 100644
--- a/chromium/chrome/renderer/safe_browsing/scorer.h
+++ b/chromium/chrome/renderer/safe_browsing/scorer.h
@@ -23,6 +23,7 @@
#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "components/safe_browsing/core/proto/client_model.pb.h"
+#include "third_party/skia/include/core/SkBitmap.h"
namespace safe_browsing {
class FeatureMap;
@@ -41,6 +42,11 @@ class Scorer {
// (range is inclusive on both ends).
virtual double ComputeScore(const FeatureMap& features) const;
+ // This method matches the given |bitmap| against the visual model. It returns
+ // true if any visual target matches, and populates |request| appropriately.
+ virtual bool GetMatchingVisualTargets(const SkBitmap& bitmap,
+ ClientPhishingRequest* request) const;
+
// Returns the version number of the loaded client model.
int model_version() const;
diff --git a/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc b/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc
index 9ea41fd1ccc..9083eca05a9 100644
--- a/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc
+++ b/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc
@@ -59,9 +59,39 @@ class PhishingScorerTest : public ::testing::Test {
model_.set_murmur_hash_seed(12345U);
model_.set_max_shingles_per_page(10);
model_.set_shingle_size(3);
+
+ // The first target hash is all 1-bits, except the first 8.
+ std::vector<unsigned char> target_hash;
+ target_hash.push_back('\x30');
+ for (int i = 0; i < 288; i++)
+ target_hash.push_back('\xff');
+ target_hash[1] = '\x00';
+ VisualTarget* target1 = model_.mutable_vision_model()->add_targets();
+ target1->set_digest("target1");
+ target1->set_hash(target_hash.data(), target_hash.size());
+ target1->mutable_match_config()->add_match_rule()->set_hash_distance(8.0);
+
+ // The second target hash is all 1-bits, except the second 8.
+ target_hash[1] = '\xff';
+ target_hash[2] = '\x00';
+ VisualTarget* target2 = model_.mutable_vision_model()->add_targets();
+ target2->set_digest("target2");
+ target2->set_hash(target_hash.data(), target_hash.size());
+ target2->mutable_match_config()->add_match_rule()->set_hash_distance(8.0);
+
+ // Allocate a bitmap for testing visual scoring
+ sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB(
+ {2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0},
+ SkNamedGamut::kRec2020);
+ SkImageInfo bitmap_info =
+ SkImageInfo::Make(1000, 1000, SkColorType::kRGBA_8888_SkColorType,
+ SkAlphaType::kUnpremul_SkAlphaType, rec2020);
+
+ ASSERT_TRUE(bitmap_.tryAllocPixels(bitmap_info));
}
ClientSideModel model_;
+ SkBitmap bitmap_;
};
TEST_F(PhishingScorerTest, HasValidModel) {
@@ -145,4 +175,48 @@ TEST_F(PhishingScorerTest, ComputeScore) {
EXPECT_TRUE(features.AddBooleanFeature("feature2"));
EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
}
+
+TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchOne) {
+ std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
+
+ // Make the whole image white
+ for (int x = 0; x < 1000; x++)
+ for (int y = 0; y < 1000; y++)
+ *bitmap_.getAddr32(x, y) = 0xffffffff;
+
+ // Make the first 164 pixels black. This will make the first 8 bits of the
+ // hash 0.
+ for (int x = 0; x < 164; x++)
+ *bitmap_.getAddr32(x, 0) = 0xff000000;
+
+ ClientPhishingRequest request;
+ scorer->GetMatchingVisualTargets(bitmap_, &request);
+ ASSERT_EQ(request.vision_match_size(), 1);
+ EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1");
+}
+
+TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchBoth) {
+ std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
+
+ // Make the whole image white
+ for (int x = 0; x < 1000; x++)
+ for (int y = 0; y < 1000; y++)
+ *bitmap_.getAddr32(x, y) = 0xffffffff;
+
+ // Create an alternating black/white pattern to match both targets. The
+ // pattern is 84 black pixels, then 84 white, then 84 black, then 84 white.
+ // This causes the hash to start 0F0F, for a distance of 8 from both targets.
+ for (int x = 0; x < 84; x++)
+ *bitmap_.getAddr32(x, 0) = 0xff000000;
+
+ for (int x = 168; x < 248; x++)
+ *bitmap_.getAddr32(x, 0) = 0xff000000;
+
+ ClientPhishingRequest request;
+ scorer->GetMatchingVisualTargets(bitmap_, &request);
+ ASSERT_EQ(request.vision_match_size(), 2);
+ EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1");
+ EXPECT_EQ(request.vision_match(1).matched_target_digest(), "target2");
+}
+
} // namespace safe_browsing