diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-12 14:27:29 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-10-13 09:35:20 +0000 |
commit | c30a6232df03e1efbd9f3b226777b07e087a1122 (patch) | |
tree | e992f45784689f373bcc38d1b79a239ebe17ee23 /chromium/chrome/renderer/safe_browsing | |
parent | 7b5b123ac58f58ffde0f4f6e488bcd09aa4decd3 (diff) | |
download | qtwebengine-chromium-85-based.tar.gz |
BASELINE: Update Chromium to 85.0.4183.14085-based
Change-Id: Iaa42f4680837c57725b1344f108c0196741f6057
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/chrome/renderer/safe_browsing')
22 files changed, 414 insertions, 389 deletions
diff --git a/chromium/chrome/renderer/safe_browsing/DEPS b/chromium/chrome/renderer/safe_browsing/DEPS index 76dae5d8e5a..6b8537549c7 100644 --- a/chromium/chrome/renderer/safe_browsing/DEPS +++ b/chromium/chrome/renderer/safe_browsing/DEPS @@ -1,9 +1,12 @@ include_rules = [ + "+components/paint_preview/common", "+components/safe_browsing/content/renderer", + "+components/safe_browsing/content/password_protection/visual_utils.h", "+components/safe_browsing/core/common", "+components/safe_browsing/core/proto/csd.pb.h", "+components/safe_browsing/core/proto/client_model.pb.h", "+components/safe_browsing/core/features.h", + "+cc/paint", "+third_party/smhasher", ] diff --git a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc b/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc deleted file mode 100644 index 45fbd4d4437..00000000000 --- a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.cc +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" - -namespace safe_browsing { - -FeatureExtractorClock::~FeatureExtractorClock() {} - -base::TimeTicks FeatureExtractorClock::Now() { - return base::TimeTicks::Now(); -} - -} // namespace safe_browsing diff --git a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h b/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h deleted file mode 100644 index 930c1a4b7ab..00000000000 --- a/chromium/chrome/renderer/safe_browsing/feature_extractor_clock.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// A simple abstraction for getting the current time during feature extraction. -// This can be mocked out for testing. - -#ifndef CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_ -#define CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_ - -#include "base/macros.h" -#include "base/time/time.h" - -namespace safe_browsing { - -class FeatureExtractorClock { - public: - FeatureExtractorClock() {} - virtual ~FeatureExtractorClock(); - - // Returns the current time. May be mocked for testing. - virtual base::TimeTicks Now(); - - private: - DISALLOW_COPY_AND_ASSIGN(FeatureExtractorClock); -}; - -} // namespace safe_browsing - -#endif // CHROME_RENDERER_SAFE_BROWSING_FEATURE_EXTRACTOR_CLOCK_H_ diff --git a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc b/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc deleted file mode 100644 index 665baf0b5c3..00000000000 --- a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.cc +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" - -namespace safe_browsing { - -MockFeatureExtractorClock::MockFeatureExtractorClock() - : FeatureExtractorClock() {} - -MockFeatureExtractorClock::~MockFeatureExtractorClock() {} - -} // namespace safe_browsing diff --git a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h b/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h deleted file mode 100644 index 7012ee81090..00000000000 --- a/chromium/chrome/renderer/safe_browsing/mock_feature_extractor_clock.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// A mock implementation of FeatureExtractorClock for testing. - -#ifndef CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_ -#define CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_ - -#include "base/macros.h" -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" -#include "testing/gmock/include/gmock/gmock.h" - -namespace safe_browsing { - -class MockFeatureExtractorClock : public FeatureExtractorClock { - public: - MockFeatureExtractorClock(); - ~MockFeatureExtractorClock() override; - - MOCK_METHOD0(Now, base::TimeTicks()); - - private: - DISALLOW_COPY_AND_ASSIGN(MockFeatureExtractorClock); -}; - -} // namespace safe_browsing - -#endif // CHROME_RENDERER_SAFE_BROWSING_MOCK_FEATURE_EXTRACTOR_CLOCK_H_ diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc index 0842c7ea50a..c53c18d31b5 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier.cc @@ -11,18 +11,18 @@ #include "base/callback.h" #include "base/compiler_specific.h" #include "base/location.h" -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/single_thread_task_runner.h" #include "base/strings/string_util.h" #include "base/threading/thread_task_runner_handle.h" +#include "cc/paint/skia_paint_canvas.h" #include "chrome/common/url_constants.h" -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" #include "chrome/renderer/safe_browsing/phishing_term_feature_extractor.h" #include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h" #include "chrome/renderer/safe_browsing/scorer.h" +#include "components/paint_preview/common/paint_preview_tracker.h" #include "components/safe_browsing/core/proto/csd.pb.h" #include "content/public/renderer/render_frame.h" #include "crypto/sha2.h" @@ -32,6 +32,7 @@ #include "third_party/blink/public/web/web_document_loader.h" #include "third_party/blink/public/web/web_local_frame.h" #include "third_party/blink/public/web/web_view.h" +#include "ui/gfx/geometry/rect_conversions.h" #include "url/gurl.h" namespace safe_browsing { @@ -39,32 +40,29 @@ namespace safe_browsing { const float PhishingClassifier::kInvalidScore = -1.0; const float PhishingClassifier::kPhishyThreshold = 0.5; -PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame, - FeatureExtractorClock* clock) - : render_frame_(render_frame), scorer_(nullptr), clock_(clock) { +PhishingClassifier::PhishingClassifier(content::RenderFrame* render_frame) + : render_frame_(render_frame), scorer_(nullptr) { Clear(); } PhishingClassifier::~PhishingClassifier() { // The RenderView should have called CancelPendingClassification() before // we are destroyed. - CheckNoPendingClassification(); + DCHECK(done_callback_.is_null()); + DCHECK(!page_text_); } void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) { - CheckNoPendingClassification(); + DCHECK(done_callback_.is_null()); + DCHECK(!page_text_); scorer_ = scorer; if (scorer_) { - url_extractor_.reset(new PhishingUrlFeatureExtractor); - dom_extractor_.reset(new PhishingDOMFeatureExtractor(clock_.get())); - term_extractor_.reset(new PhishingTermFeatureExtractor( - &scorer_->page_terms(), - &scorer_->page_words(), - scorer_->max_words_per_term(), - scorer_->murmurhash3_seed(), - scorer_->max_shingles_per_page(), - scorer_->shingle_size(), - clock_.get())); + url_extractor_ = std::make_unique<PhishingUrlFeatureExtractor>(); + dom_extractor_ = std::make_unique<PhishingDOMFeatureExtractor>(); + term_extractor_ = std::make_unique<PhishingTermFeatureExtractor>( + &scorer_->page_terms(), &scorer_->page_words(), + scorer_->max_words_per_term(), scorer_->murmurhash3_seed(), + scorer_->max_shingles_per_page(), scorer_->shingle_size()); } else { // We're disabling client-side phishing detection, so tear down all // of the relevant objects. @@ -75,7 +73,7 @@ void PhishingClassifier::set_phishing_scorer(const Scorer* scorer) { } bool PhishingClassifier::is_ready() const { - return scorer_ != NULL; + return !!scorer_; } void PhishingClassifier::BeginClassification(const base::string16* page_text, @@ -84,7 +82,8 @@ void PhishingClassifier::BeginClassification(const base::string16* page_text, // The RenderView should have called CancelPendingClassification() before // starting a new classification, so DCHECK this. - CheckNoPendingClassification(); + DCHECK(done_callback_.is_null()); + DCHECK(!page_text_); // However, in an opt build, we will go ahead and clean up the pending // classification so that we can start in a known state. CancelPendingClassification(); @@ -158,41 +157,70 @@ void PhishingClassifier::DOMExtractionFinished(bool success) { void PhishingClassifier::TermExtractionFinished(bool success) { if (success) { - blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame(); - - // Hash all of the features so that they match the model, then compute - // the score. - FeatureMap hashed_features; - ClientPhishingRequest verdict; - verdict.set_model_version(scorer_->model_version()); - verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8()); - for (const auto& it : features_->features()) { - bool result = hashed_features.AddRealFeature( - crypto::SHA256HashString(it.first), it.second); - DCHECK(result); - ClientPhishingRequest::Feature* feature = verdict.add_feature_map(); - feature->set_name(it.first); - feature->set_value(it.second); - } - for (const auto& it : *shingle_hashes_) { - verdict.add_shingle_hashes(it); - } - float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); - verdict.set_client_score(score); - verdict.set_is_phishing(score >= scorer_->threshold_probability()); - RunCallback(verdict); + ExtractVisualFeatures(); } else { RunFailureCallback(); } } -void PhishingClassifier::CheckNoPendingClassification() { - DCHECK(done_callback_.is_null()); - DCHECK(!page_text_); - if (!done_callback_.is_null() || page_text_) { - LOG(ERROR) << "Classification in progress, missing call to " - << "CancelPendingClassification"; +void PhishingClassifier::ExtractVisualFeatures() { + blink::WebLocalFrame* frame = render_frame_->GetWebFrame(); + gfx::SizeF viewport_size = frame->View()->VisualViewportSize(); + gfx::Rect bounds = ToEnclosingRect(gfx::RectF(viewport_size)); + bitmap_ = std::make_unique<SkBitmap>(); + // Use the Rec. 2020 color space, in case the user input is wide-gamut. + sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB( + {2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0}, + SkNamedGamut::kRec2020); + SkImageInfo bitmap_info = SkImageInfo::Make( + bounds.width(), bounds.height(), SkColorType::kRGBA_8888_SkColorType, + SkAlphaType::kUnpremul_SkAlphaType, rec2020); + if (!bitmap_->tryAllocPixels(bitmap_info)) + return VisualExtractionFinished(/*success=*/false); + SkCanvas sk_canvas(*bitmap_); + cc::SkiaPaintCanvas cc_canvas(&sk_canvas); + auto tracker = std::make_unique<paint_preview::PaintPreviewTracker>( + base::UnguessableToken::Create(), frame->GetEmbeddingToken(), + /*is_main_frame=*/true); + cc_canvas.SetPaintPreviewTracker(tracker.get()); + VisualExtractionFinished(frame->CapturePaintPreview( + bounds, &cc_canvas, /*include_linked_destinations=*/false)); +} + +void PhishingClassifier::VisualExtractionFinished(bool success) { + if (!success) { + RunFailureCallback(); + return; + } + + blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame(); + + // Hash all of the features so that they match the model, then compute + // the score. + FeatureMap hashed_features; + ClientPhishingRequest verdict; + verdict.set_model_version(scorer_->model_version()); + verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8()); + for (const auto& it : features_->features()) { + bool result = hashed_features.AddRealFeature( + crypto::SHA256HashString(it.first), it.second); + DCHECK(result); + ClientPhishingRequest::Feature* feature = verdict.add_feature_map(); + feature->set_name(it.first); + feature->set_value(it.second); } + for (const auto& it : *shingle_hashes_) { + verdict.add_shingle_hashes(it); + } + float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); + verdict.set_client_score(score); + verdict.set_is_phishing(score >= scorer_->threshold_probability()); + + if (scorer_->GetMatchingVisualTargets(*bitmap_, &verdict)) { + verdict.set_is_phishing(true); + } + + RunCallback(verdict); } void PhishingClassifier::RunCallback(const ClientPhishingRequest& verdict) { @@ -211,10 +239,11 @@ void PhishingClassifier::RunFailureCallback() { } void PhishingClassifier::Clear() { - page_text_ = NULL; + page_text_ = nullptr; done_callback_.Reset(); - features_.reset(NULL); - shingle_hashes_.reset(NULL); + features_.reset(nullptr); + shingle_hashes_.reset(nullptr); + bitmap_.reset(nullptr); } } // namespace safe_browsing diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier.h b/chromium/chrome/renderer/safe_browsing/phishing_classifier.h index 9528c434cce..401ae402dba 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier.h +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier.h @@ -27,6 +27,7 @@ #include "base/macros.h" #include "base/memory/weak_ptr.h" #include "base/strings/string16.h" +#include "third_party/skia/include/core/SkBitmap.h" namespace content { class RenderFrame; @@ -34,7 +35,6 @@ class RenderFrame; namespace safe_browsing { class ClientPhishingRequest; -class FeatureExtractorClock; class FeatureMap; class PhishingDOMFeatureExtractor; class PhishingTermFeatureExtractor; @@ -55,11 +55,9 @@ class PhishingClassifier { static const float kInvalidScore; // Creates a new PhishingClassifier object that will operate on - // |render_view|. |clock| is used to time feature extractor operations, and - // the PhishingClassifier takes ownership of this object. Note that the - // classifier will not be 'ready' until set_phishing_scorer() is called. - PhishingClassifier(content::RenderFrame* render_frame, - FeatureExtractorClock* clock); + // |render_view|. Note that the classifier will not be 'ready' until + // set_phishing_scorer() is called. + explicit PhishingClassifier(content::RenderFrame* render_frame); virtual ~PhishingClassifier(); // Sets a scorer for the classifier to use in computing the phishiness score. @@ -110,15 +108,18 @@ class PhishingClassifier { void DOMExtractionFinished(bool success); // Callback to be run when term feature extraction is complete. + // If it was successful, begins visual feature extraction, otherwise runs the + // DoneCallback with a non-phishy verdict. + void TermExtractionFinished(bool success); + + // Called to extract the visual features of the current page. + void ExtractVisualFeatures(); + + // Callback when visual feature extraction is complete. // If it was successful, computes a score and runs the DoneCallback. // If extraction was unsuccessful, runs the DoneCallback with a // non-phishy verdict. - void TermExtractionFinished(bool success); - - // Helper to verify that there is no pending phishing classification. Dies - // in debug builds if the state is not as expected. This is a no-op in - // release builds. - void CheckNoPendingClassification(); + void VisualExtractionFinished(bool success); // Helper method to run the DoneCallback and clear the state. void RunCallback(const ClientPhishingRequest& verdict); @@ -132,7 +133,6 @@ class PhishingClassifier { content::RenderFrame* render_frame_; // owns us const Scorer* scorer_; // owned by the caller - std::unique_ptr<FeatureExtractorClock> clock_; std::unique_ptr<PhishingUrlFeatureExtractor> url_extractor_; std::unique_ptr<PhishingDOMFeatureExtractor> dom_extractor_; std::unique_ptr<PhishingTermFeatureExtractor> term_extractor_; @@ -141,6 +141,7 @@ class PhishingClassifier { std::unique_ptr<FeatureMap> features_; std::unique_ptr<std::set<uint32_t>> shingle_hashes_; const base::string16* page_text_; // owned by the caller + std::unique_ptr<SkBitmap> bitmap_; DoneCallback done_callback_; // Used in scheduling BeginFeatureExtraction tasks. diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc index 2600269b203..484958e30db 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc @@ -12,9 +12,9 @@ #include "base/run_loop.h" #include "base/strings/string16.h" #include "base/strings/utf_string_conversions.h" +#include "base/test/test_discardable_memory_allocator.h" #include "chrome/renderer/chrome_content_renderer_client.h" #include "chrome/renderer/safe_browsing/features.h" -#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/murmurhash3_util.h" #include "chrome/renderer/safe_browsing/scorer.h" #include "chrome/test/base/chrome_render_view_test.h" @@ -60,6 +60,8 @@ class PhishingClassifierTest : public ChromeRenderViewTest { ChromeRenderViewTest::SetUp(); PrepareModel(); SetUpClassifier(); + + base::DiscardableMemoryAllocator::SetInstance(&test_allocator_); } void PrepareModel() { @@ -99,18 +101,16 @@ class PhishingClassifierTest : public ChromeRenderViewTest { model.set_max_shingles_per_page(100); model.set_shingle_size(3); - clock_ = new MockFeatureExtractorClock; + // Add an empty visual target to ensure visual detection runs. + model.mutable_vision_model()->add_targets(); + scorer_.reset(Scorer::Create(model.SerializeAsString())); ASSERT_TRUE(scorer_.get()); - - // These tests don't exercise the extraction timing. - EXPECT_CALL(*clock_, Now()) - .WillRepeatedly(::testing::Return(base::TimeTicks::Now())); } void SetUpClassifier() { - classifier_.reset( - new PhishingClassifier(view_->GetMainRenderFrame(), clock_)); + classifier_ = + std::make_unique<PhishingClassifier>(view_->GetMainRenderFrame()); // No scorer yet, so the classifier is not ready. ASSERT_FALSE(classifier_->is_ready()); @@ -138,6 +138,8 @@ class PhishingClassifierTest : public ChromeRenderViewTest { verdict.feature_map(i).value()); } is_phishing_ = verdict.is_phishing(); + screenshot_phash_ = verdict.screenshot_phash(); + phash_dimension_size_ = verdict.phash_dimension_size(); } void LoadHtml(const GURL& url, const std::string& content) { @@ -153,7 +155,6 @@ class PhishingClassifierTest : public ChromeRenderViewTest { std::string response_content_; std::unique_ptr<Scorer> scorer_; std::unique_ptr<PhishingClassifier> classifier_; - MockFeatureExtractorClock* clock_; // Owned by classifier_. // Features that are in the model. const std::string url_tld_token_net_; @@ -165,6 +166,11 @@ class PhishingClassifierTest : public ChromeRenderViewTest { FeatureMap feature_map_; float phishy_score_; bool is_phishing_; + std::string screenshot_phash_; + int phash_dimension_size_; + + // A DiscardableMemoryAllocator is needed for certain Skia operations. + base::TestDiscardableMemoryAllocator test_allocator_; }; TEST_F(PhishingClassifierTest, TestClassificationOfPhishingDotComHttp) { @@ -261,6 +267,15 @@ TEST_F(PhishingClassifierTest, DisableDetection) { EXPECT_FALSE(classifier_->is_ready()); } +TEST_F(PhishingClassifierTest, TestSendsVisualHash) { + LoadHtml(GURL("https://host.net"), + "<html><body><a href=\"http://safe.com/\">login</a></body></html>"); + RunPhishingClassifier(&page_text_); + + EXPECT_EQ(phash_dimension_size_, 48); + EXPECT_FALSE(screenshot_phash_.empty()); +} + // TODO(jialiul): Add test to verify that classification only starts on GET // method. It seems there is no easy way to simulate a HTTP POST in // ChromeRenderViewTest. diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc index 0afae4ea172..b65c048fd82 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc @@ -12,10 +12,8 @@ #include "base/callback.h" #include "base/debug/stack_trace.h" #include "base/lazy_instance.h" -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/no_destructor.h" -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/phishing_classifier.h" #include "chrome/renderer/safe_browsing/scorer.h" #include "components/safe_browsing/content/common/safe_browsing.mojom-forward.h" @@ -54,42 +52,6 @@ base::LazyInstance<std::unique_ptr<const safe_browsing::Scorer>>:: } // namespace -// static -void PhishingClassifierFilter::Create( - mojo::PendingReceiver<mojom::PhishingModelSetter> receiver) { - mojo::MakeSelfOwnedReceiver(std::make_unique<PhishingClassifierFilter>(), - std::move(receiver)); -} - -PhishingClassifierFilter::PhishingClassifierFilter() {} - -PhishingClassifierFilter::~PhishingClassifierFilter() {} - -void PhishingClassifierFilter::SetPhishingModel(const std::string& model) { - safe_browsing::Scorer* scorer = NULL; - // An empty model string means we should disable client-side phishing - // detection. - if (!model.empty()) { - scorer = safe_browsing::Scorer::Create(model); - if (!scorer) { - DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?"; - return; - } - } - for (auto* delegate : PhishingClassifierDelegates()) - delegate->SetPhishingScorer(scorer); - g_phishing_scorer.Get().reset(scorer); -} - -// static -PhishingClassifierDelegate* PhishingClassifierDelegate::Create( - content::RenderFrame* render_frame, - PhishingClassifier* classifier) { - // Private constructor and public static Create() method to facilitate - // stubbing out this class for binary-size reduction purposes. - return new PhishingClassifierDelegate(render_frame, classifier); -} - PhishingClassifierDelegate::PhishingClassifierDelegate( content::RenderFrame* render_frame, PhishingClassifier* classifier) @@ -99,8 +61,7 @@ PhishingClassifierDelegate::PhishingClassifierDelegate( is_classifying_(false) { PhishingClassifierDelegates().insert(this); if (!classifier) { - classifier = - new PhishingClassifier(render_frame, new FeatureExtractorClock()); + classifier = new PhishingClassifier(render_frame); } classifier_.reset(classifier); @@ -118,6 +79,29 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() { PhishingClassifierDelegates().erase(this); } +void PhishingClassifierDelegate::SetPhishingModel(const std::string& model) { + safe_browsing::Scorer* scorer = nullptr; + // An empty model string means we should disable client-side phishing + // detection. + if (!model.empty()) { + scorer = safe_browsing::Scorer::Create(model); + if (!scorer) + return; + } + for (auto* delegate : PhishingClassifierDelegates()) + delegate->SetPhishingScorer(scorer); + g_phishing_scorer.Get().reset(scorer); +} + +// static +PhishingClassifierDelegate* PhishingClassifierDelegate::Create( + content::RenderFrame* render_frame, + PhishingClassifier* classifier) { + // Private constructor and public static Create() method to facilitate + // stubbing out this class for binary-size reduction purposes. + return new PhishingClassifierDelegate(render_frame, classifier); +} + void PhishingClassifierDelegate::SetPhishingScorer( const safe_browsing::Scorer* scorer) { if (is_classifying_) { @@ -159,23 +143,22 @@ void PhishingClassifierDelegate::StartPhishingDetection( } void PhishingClassifierDelegate::DidCommitProvisionalLoad( - bool is_same_document_navigation, ui::PageTransition transition) { blink::WebLocalFrame* frame = render_frame()->GetWebFrame(); // A new page is starting to load, so cancel classificaiton. - // + CancelPendingClassification(NAVIGATE_AWAY); + if (!frame->Parent()) + last_main_frame_transition_ = transition; +} + +void PhishingClassifierDelegate::DidFinishSameDocumentNavigation() { // TODO(bryner): We shouldn't need to cancel classification if the navigation // is within the same document. However, if we let classification continue in // this case, we need to properly deal with the fact that PageCaptured will // be called again for the same-document navigation. We need to be sure not // to swap out the page text while the term feature extractor is still // running. - CancelPendingClassification(is_same_document_navigation ? NAVIGATE_WITHIN_PAGE - : NAVIGATE_AWAY); - if (frame->Parent()) - return; - - last_main_frame_transition_ = transition; + CancelPendingClassification(NAVIGATE_WITHIN_PAGE); } void PhishingClassifierDelegate::PageCaptured(base::string16* page_text, @@ -196,8 +179,8 @@ void PhishingClassifierDelegate::PageCaptured(base::string16* page_text, have_page_text_ = true; GURL stripped_last_load_url(StripRef(last_finished_load_url_)); + // Check if toplevel URL has changed. if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) { - DVLOG(2) << "Toplevel URL is unchanged, not starting classification."; return; } @@ -225,8 +208,6 @@ void PhishingClassifierDelegate::CancelPendingClassification( void PhishingClassifierDelegate::ClassificationDone( const ClientPhishingRequest& verdict) { - DVLOG(2) << "Phishy verdict = " << verdict.is_phishing() - << " score = " << verdict.client_score(); is_phishing_detection_running_ = false; if (callback_.is_null()) return; @@ -255,7 +236,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() { // classified at all (as opposed to deferring it until we get an IPC or // the load completes), we discard the page text since it won't be needed. if (!classifier_->is_ready()) { - DVLOG(2) << "Not starting classification, no Scorer created."; is_phishing_detection_running_ = false; // Keep classifier_page_text_, in case a Scorer is set later. if (!callback_.is_null()) @@ -268,7 +248,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() { // Skip loads from session history navigation. However, update the // last URL sent to the classifier, so that we'll properly detect // same-document navigations. - DVLOG(2) << "Not starting classification for back/forward navigation"; last_url_sent_to_classifier_ = last_finished_load_url_; classifier_page_text_.clear(); // we won't need this. have_page_text_ = false; @@ -281,7 +260,6 @@ void PhishingClassifierDelegate::MaybeStartClassification() { GURL stripped_last_load_url(StripRef(last_finished_load_url_)); if (!have_page_text_) { - DVLOG(2) << "Not starting classification, there is no page text ready."; RecordEvent(SBPhishingClassifierEvent::kPageTextNotLoaded); return; } @@ -292,15 +270,11 @@ void PhishingClassifierDelegate::MaybeStartClassification() { // so defer classification for now. Note: the ref does not affect // any of the browser's preclassification checks, so we don't require it // to match. - DVLOG(2) << "Not starting classification, last url from browser is " - << last_url_received_from_browser_ << ", last finished load is " - << last_finished_load_url_; // Keep classifier_page_text_, in case the browser notifies us later that // we should classify the URL. return; } - DVLOG(2) << "Starting classification for " << last_finished_load_url_; last_url_sent_to_classifier_ = last_finished_load_url_; is_classifying_ = true; classifier_->BeginClassification( diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h index 9910fc79509..80aa1193e95 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate.h @@ -38,21 +38,6 @@ enum class SBPhishingClassifierEvent { kMaxValue = kDestructedBeforeClassificationDone, }; -class PhishingClassifierFilter : public mojom::PhishingModelSetter { - public: - PhishingClassifierFilter(); - ~PhishingClassifierFilter() override; - - static void Create( - mojo::PendingReceiver<mojom::PhishingModelSetter> receiver); - - private: - // mojom::PhishingModelSetter - void SetPhishingModel(const std::string& model) override; - - DISALLOW_COPY_AND_ASSIGN(PhishingClassifierFilter); -}; - class PhishingClassifierDelegate : public content::RenderFrameObserver, public mojom::PhishingDetector { public: @@ -63,6 +48,9 @@ class PhishingClassifierDelegate : public content::RenderFrameObserver, PhishingClassifier* classifier); ~PhishingClassifierDelegate() override; + // mojom::PhishingDetector + void SetPhishingModel(const std::string& model) override; + // Called by the RenderFrame once there is a phishing scorer available. // The scorer is passed on to the classifier. void SetPhishingScorer(const safe_browsing::Scorer* scorer); @@ -78,10 +66,11 @@ class PhishingClassifierDelegate : public content::RenderFrameObserver, // Called by the RenderFrame when a page has started loading in the given // WebFrame. Typically, this will cause any pending classification to be - // cancelled. However, if the navigation is within the same page, we - // continue running the current classification. - void DidCommitProvisionalLoad(bool is_same_document_navigation, - ui::PageTransition transition) override; + // cancelled. + void DidCommitProvisionalLoad(ui::PageTransition transition) override; + // Called by the RenderFrame when the same-document navigation has been + // committed. We continue running the current classification. + void DidFinishSameDocumentNavigation() override; private: friend class PhishingClassifierDelegateTest; diff --git a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc index 2acec9d451c..0ae43da87c5 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc @@ -37,7 +37,7 @@ namespace { class MockPhishingClassifier : public PhishingClassifier { public: explicit MockPhishingClassifier(content::RenderFrame* render_frame) - : PhishingClassifier(render_frame, NULL /* clock */) {} + : PhishingClassifier(render_frame) {} ~MockPhishingClassifier() override {} @@ -229,6 +229,25 @@ TEST_F(PhishingClassifierDelegateTest, Navigation) { EXPECT_CALL(*classifier_, CancelPendingClassification()); } +TEST_F(PhishingClassifierDelegateTest, NoPhishingModel) { + ASSERT_FALSE(classifier_->is_ready()); + delegate_->SetPhishingModel(""); + // The scorer is nullptr so the classifier should still not be ready. + ASSERT_FALSE(classifier_->is_ready()); +} + +TEST_F(PhishingClassifierDelegateTest, HasPhishingModel) { + ASSERT_FALSE(classifier_->is_ready()); + + ClientSideModel model; + model.set_max_words_per_term(1); + delegate_->SetPhishingModel(model.SerializeAsString()); + ASSERT_TRUE(classifier_->is_ready()); + + // The delegate will cancel pending classification on destruction. + EXPECT_CALL(*classifier_, CancelPendingClassification()); +} + TEST_F(PhishingClassifierDelegateTest, NoScorer) { // For this test, we'll create the delegate with no scorer available yet. ASSERT_FALSE(classifier_->is_ready()); diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc index f1a9fb37d82..c31a97320cb 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc @@ -9,13 +9,12 @@ #include "base/bind.h" #include "base/compiler_specific.h" #include "base/location.h" -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/single_thread_task_runner.h" #include "base/strings/string_util.h" #include "base/threading/thread_task_runner_handle.h" +#include "base/time/default_tick_clock.h" #include "base/time/time.h" -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/features.h" #include "content/public/renderer/render_view.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" @@ -102,16 +101,17 @@ struct PhishingDOMFeatureExtractor::FrameData { std::string domain; }; -PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor( - FeatureExtractorClock* clock) - : clock_(clock) { +PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor() + : clock_(base::DefaultTickClock::GetInstance()) { Clear(); } PhishingDOMFeatureExtractor::~PhishingDOMFeatureExtractor() { // The RenderView should have called CancelPendingExtraction() before // we are destroyed. - CheckNoPendingExtraction(); + DCHECK(done_callback_.is_null()); + DCHECK(!cur_frame_data_.get()); + DCHECK(cur_document_.IsNull()); } void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document, @@ -119,7 +119,9 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document, DoneCallback done_callback) { // The RenderView should have called CancelPendingExtraction() before // starting a new extraction, so DCHECK this. - CheckNoPendingExtraction(); + DCHECK(done_callback_.is_null()); + DCHECK(!cur_frame_data_.get()); + DCHECK(cur_document_.IsNull()); // However, in an opt build, we will go ahead and clean up the pending // extraction so that we can start in a known state. CancelPendingExtraction(); @@ -127,7 +129,7 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(blink::WebDocument document, features_ = features; done_callback_ = std::move(done_callback); - page_feature_state_.reset(new PageFeatureState(clock_->Now())); + page_feature_state_ = std::make_unique<PageFeatureState>(clock_->NowTicks()); cur_document_ = document; base::ThreadTaskRunnerHandle::Get()->PostTask( @@ -145,7 +147,7 @@ void PhishingDOMFeatureExtractor::CancelPendingExtraction() { void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() { DCHECK(page_feature_state_.get()); ++page_feature_state_->num_iterations; - base::TimeTicks current_chunk_start_time = clock_->Now(); + base::TimeTicks current_chunk_start_time = clock_->NowTicks(); if (cur_document_.IsNull()) { // This will only happen if we weren't able to get the document for the @@ -166,7 +168,7 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() { // modified between our chunks of work. Log how long this takes, so we // can tell if it's too slow. UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime", - clock_->Now() - current_chunk_start_time); + clock_->NowTicks() - current_chunk_start_time); } else { // We just moved to a new frame, so update our frame state // and advance to the first element. @@ -190,10 +192,9 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() { if (++num_elements >= kClockCheckGranularity) { num_elements = 0; - base::TimeTicks now = clock_->Now(); + base::TimeTicks now = clock_->NowTicks(); if (now - page_feature_state_->start_time >= base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) { - DLOG(ERROR) << "Feature extraction took too long, giving up"; // We expect this to happen infrequently, so record when it does. UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.DOMFeatureTimeout", 1); RunCallback(false); @@ -233,20 +234,16 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() { void PhishingDOMFeatureExtractor::HandleLink( const blink::WebElement& element) { // Count the number of times we link to a different host. - if (!element.HasAttribute("href")) { - DVLOG(1) << "Skipping anchor tag with no href"; + if (!element.HasAttribute("href")) return; - } // Retrieve the link and resolve the link in case it's relative. blink::WebURL full_url = CompleteURL(element, element.GetAttribute("href")); std::string domain; bool is_external = IsExternalDomain(full_url, &domain); - if (domain.empty()) { - DVLOG(1) << "Could not extract domain from link: " << full_url; + if (domain.empty()) return; - } if (is_external) { ++page_feature_state_->external_links; @@ -279,10 +276,8 @@ void PhishingDOMFeatureExtractor::HandleForm( std::string domain; bool is_external = IsExternalDomain(full_url, &domain); - if (domain.empty()) { - DVLOG(1) << "Could not extract domain from form action: " << full_url; + if (domain.empty()) return; - } if (is_external) { ++page_feature_state_->action_other_domain; @@ -292,22 +287,16 @@ void PhishingDOMFeatureExtractor::HandleForm( void PhishingDOMFeatureExtractor::HandleImage( const blink::WebElement& element) { - if (!element.HasAttribute("src")) { - DVLOG(1) << "Skipping img tag with no src"; - } - // Record whether the image points to a different domain. blink::WebURL full_url = CompleteURL(element, element.GetAttribute("src")); std::string domain; bool is_external = IsExternalDomain(full_url, &domain); - if (domain.empty()) { - DVLOG(1) << "Could not extract domain from image src: " << full_url; + if (domain.empty()) return; - } - if (is_external) { + if (is_external) ++page_feature_state_->img_other_domain; - } + ++page_feature_state_->total_imgs; } @@ -340,17 +329,6 @@ void PhishingDOMFeatureExtractor::HandleScript( ++page_feature_state_->num_script_tags; } -void PhishingDOMFeatureExtractor::CheckNoPendingExtraction() { - DCHECK(done_callback_.is_null()); - DCHECK(!cur_frame_data_.get()); - DCHECK(cur_document_.IsNull()); - if (!done_callback_.is_null() || cur_frame_data_.get() || - !cur_document_.IsNull()) { - LOG(ERROR) << "Extraction in progress, missing call to " - << "CancelPendingExtraction"; - } -} - void PhishingDOMFeatureExtractor::RunCallback(bool success) { // Record some timing stats that we can use to evaluate feature extraction // performance. These include both successful and failed extractions. @@ -358,7 +336,7 @@ void PhishingDOMFeatureExtractor::RunCallback(bool success) { UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.DOMFeatureIterations", page_feature_state_->num_iterations); UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureTotalTime", - clock_->Now() - page_feature_state_->start_time); + clock_->NowTicks() - page_feature_state_->start_time); DCHECK(!done_callback_.is_null()); std::move(done_callback_).Run(success); @@ -366,9 +344,9 @@ void PhishingDOMFeatureExtractor::RunCallback(bool success) { } void PhishingDOMFeatureExtractor::Clear() { - features_ = NULL; + features_ = nullptr; done_callback_.Reset(); - cur_frame_data_.reset(NULL); + cur_frame_data_.reset(nullptr); cur_document_.Reset(); } diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h index 6f4e2c1503c..4639f620f2c 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h +++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h @@ -16,6 +16,7 @@ #include "base/callback.h" #include "base/macros.h" #include "base/memory/weak_ptr.h" +#include "base/time/tick_clock.h" #include "third_party/blink/public/web/web_document.h" class GURL; @@ -25,7 +26,6 @@ class WebElement; } namespace safe_browsing { -class FeatureExtractorClock; class FeatureMap; class PhishingDOMFeatureExtractor { @@ -35,9 +35,7 @@ class PhishingDOMFeatureExtractor { typedef base::OnceCallback<void(bool)> DoneCallback; // Creates a PhishingDOMFeatureExtractor instance. - // |clock| is used for timing feature extractor operations, and may be - // mocked for testing. The caller maintains ownership of the clock. - explicit PhishingDOMFeatureExtractor(FeatureExtractorClock* clock); + PhishingDOMFeatureExtractor(); virtual ~PhishingDOMFeatureExtractor(); // Begins extracting features into the given FeatureMap for the page. @@ -55,6 +53,8 @@ class PhishingDOMFeatureExtractor { // is unloaded or the PhishingDOMFeatureExtractor is destroyed. void CancelPendingExtraction(); + void SetTickClockForTesting(const base::TickClock* clock) { clock_ = clock; } + private: struct FrameData; struct PageFeatureState; @@ -88,11 +88,6 @@ class PhishingDOMFeatureExtractor { void HandleInput(const blink::WebElement& element); void HandleScript(const blink::WebElement& element); - // Helper to verify that there is no pending feature extraction. Dies in - // debug builds if the state is not as expected. This is a no-op in release - // builds. - void CheckNoPendingExtraction(); - // Runs |done_callback_| and then clears all internal state. void RunCallback(bool success); @@ -122,9 +117,7 @@ class PhishingDOMFeatureExtractor { // description of which features are computed. void InsertFeatures(); - - // Non-owned pointer to our clock. - FeatureExtractorClock* clock_; + const base::TickClock* clock_; // The output parameters from the most recent call to ExtractFeatures(). FeatureMap* features_; // The caller keeps ownership of this. diff --git a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc index e64fd770af3..0586f5a9a5a 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_browsertest.cc @@ -13,7 +13,6 @@ #include "base/time/time.h" #include "chrome/renderer/chrome_content_renderer_client.h" #include "chrome/renderer/safe_browsing/features.h" -#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/test_utils.h" #include "chrome/test/base/chrome_render_view_test.h" #include "content/public/common/content_switches.h" @@ -30,22 +29,28 @@ #include "third_party/blink/public/web/web_script_source.h" #include "ui/native_theme/native_theme_features.h" +using blink::WebRuntimeFeatures; using ::testing::DoAll; using ::testing::Invoke; using ::testing::Return; -using blink::WebRuntimeFeatures; +using ::testing::StrictMock; namespace safe_browsing { +class MockTickClock : public base::TickClock { + public: + MockTickClock() = default; + ~MockTickClock() override = default; + + MOCK_CONST_METHOD0(NowTicks, base::TimeTicks()); +}; + // TestPhishingDOMFeatureExtractor has nearly identical behavior as // PhishingDOMFeatureExtractor, except the IsExternalDomain() and // CompleteURL() functions. This is to work around the fact that // ChromeRenderViewTest object does not know where the html content is hosted. class TestPhishingDOMFeatureExtractor : public PhishingDOMFeatureExtractor { public: - explicit TestPhishingDOMFeatureExtractor(FeatureExtractorClock* clock) - : PhishingDOMFeatureExtractor(clock) {} - void SetDocumentDomain(std::string domain) { base_domain_ = domain; } void SetURLToFrameDomainCheckingMap( @@ -106,9 +111,6 @@ class TestPhishingDOMFeatureExtractor : public PhishingDOMFeatureExtractor { const std::string frame_domain = it->second; full_url = GURL("http://" + it->second).Resolve(partial_url.Utf8()); url_to_frame_domain_map_[full_url.spec()] = it->second; - } else { - NOTREACHED() << "Testing input setup is incorrect. " - "Please check url_to_frame_domain_map_ setup."; } } return blink::WebURL(full_url); @@ -191,7 +193,7 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest { ChromeRenderViewTest::SetUp(); WebRuntimeFeatures::EnableOverlayScrollbars( ui::IsOverlayScrollbarEnabled()); - extractor_.reset(new TestPhishingDOMFeatureExtractor(&clock_)); + extractor_ = std::make_unique<TestPhishingDOMFeatureExtractor>(); } void TearDown() override { @@ -218,7 +220,6 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest { "document.body.removeChild(document.getElementById('frame1'));")); } - MockFeatureExtractorClock clock_; bool success_; std::unique_ptr<TestPhishingDOMFeatureExtractor> extractor_; scoped_refptr<content::MessageLoopRunner> message_loop_; @@ -226,9 +227,6 @@ class PhishingDOMFeatureExtractorTest : public ChromeRenderViewTest { }; TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) { - // This test doesn't exercise the extraction timing. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); - FeatureMap expected_features; expected_features.AddBooleanFeature(features::kPageHasForms); expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25); @@ -296,9 +294,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) { } TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) { - // This test doesn't exercise the extraction timing. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); - FeatureMap expected_features; expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5); expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0); @@ -334,9 +329,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) { } TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) { - // This test doesn't exercise the extraction timing. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); - FeatureMap expected_features; expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); @@ -370,10 +362,6 @@ TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) { // iframe2 / \ iframe1 // \ iframe3 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) { - // This test doesn't exercise the extraction timing. - // Test that features are aggregated across all frames. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); - const char urlprefix[] = "data:text/html;charset=utf-8,"; std::unordered_map<std::string, std::string> url_iframe_map; std::string iframe1_nested_html( @@ -443,8 +431,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) { } TEST_F(PhishingDOMFeatureExtractorTest, Continuation) { - // For this test, we'll cause the feature extraction to run multiple - // iterations by incrementing the clock. + StrictMock<MockTickClock> tick_clock; // This page has a total of 50 elements. For the external forms feature to // be computed correctly, the extractor has to examine the whole document. @@ -462,7 +449,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) { // Note that this assumes kClockCheckGranularity = 10 and // kMaxTimePerChunkMs = 10. base::TimeTicks now = base::TimeTicks::Now(); - EXPECT_CALL(clock_, Now()) + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -489,6 +476,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) { .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54))) // A final time check for the histograms. .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56))); + extractor_->SetTickClockForTesting(&tick_clock); FeatureMap expected_features; expected_features.AddBooleanFeature(features::kPageHasForms); @@ -502,13 +490,13 @@ TEST_F(PhishingDOMFeatureExtractorTest, Continuation) { ExtractFeatures("host.com", html, &features); ExpectFeatureMapsAreEqual(features, expected_features); // Make sure none of the mock expectations carry over to the next test. - ::testing::Mock::VerifyAndClearExpectations(&clock_); + ::testing::Mock::VerifyAndClearExpectations(&tick_clock); // Now repeat the test with the same page, but advance the clock faster so // that the extraction time exceeds the maximum total time for the feature // extractor. Extraction should fail. Note that this assumes // kMaxTotalTimeMs = 500. - EXPECT_CALL(clock_, Now()) + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -542,7 +530,8 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) { GURL iframe1_url(urlprefix + iframe1_html); base::TimeTicks now = base::TimeTicks::Now(); - EXPECT_CALL(clock_, Now()) + StrictMock<MockTickClock> tick_clock; + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -559,6 +548,7 @@ TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) { .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27))) // A final time check for the histograms. .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33))); + extractor_->SetTickClockForTesting(&tick_clock); FeatureMap expected_features; expected_features.AddBooleanFeature(features::kPageHasForms); diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc index 2bea4a2ea43..61d045735cb 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.cc @@ -15,13 +15,12 @@ #include "base/i18n/break_iterator.h" #include "base/i18n/case_conversion.h" #include "base/location.h" -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/single_thread_task_runner.h" #include "base/strings/utf_string_conversions.h" #include "base/threading/thread_task_runner_handle.h" +#include "base/time/default_tick_clock.h" #include "base/time/time.h" -#include "chrome/renderer/safe_browsing/feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/murmurhash3_util.h" #include "crypto/sha2.h" @@ -74,11 +73,8 @@ struct PhishingTermFeatureExtractor::ExtractionState { std::unique_ptr<base::i18n::BreakIterator> i(new base::i18n::BreakIterator( text, base::i18n::BreakIterator::BREAK_WORD)); - if (i->Init()) { + if (i->Init()) iterator = std::move(i); - } else { - DLOG(ERROR) << "failed to open iterator"; - } } }; @@ -88,22 +84,22 @@ PhishingTermFeatureExtractor::PhishingTermFeatureExtractor( size_t max_words_per_term, uint32_t murmurhash3_seed, size_t max_shingles_per_page, - size_t shingle_size, - FeatureExtractorClock* clock) + size_t shingle_size) : page_term_hashes_(page_term_hashes), page_word_hashes_(page_word_hashes), max_words_per_term_(max_words_per_term), murmurhash3_seed_(murmurhash3_seed), max_shingles_per_page_(max_shingles_per_page), shingle_size_(shingle_size), - clock_(clock) { + clock_(base::DefaultTickClock::GetInstance()) { Clear(); } PhishingTermFeatureExtractor::~PhishingTermFeatureExtractor() { // The RenderView should have called CancelPendingExtraction() before // we are destroyed. - CheckNoPendingExtraction(); + DCHECK(done_callback_.is_null()); + DCHECK(!state_.get()); } void PhishingTermFeatureExtractor::ExtractFeatures( @@ -113,7 +109,8 @@ void PhishingTermFeatureExtractor::ExtractFeatures( DoneCallback done_callback) { // The RenderView should have called CancelPendingExtraction() before // starting a new extraction, so DCHECK this. - CheckNoPendingExtraction(); + DCHECK(done_callback_.is_null()); + DCHECK(!state_.get()); // However, in an opt build, we will go ahead and clean up the pending // extraction so that we can start in a known state. CancelPendingExtraction(); @@ -122,7 +119,7 @@ void PhishingTermFeatureExtractor::ExtractFeatures( features_ = features; shingle_hashes_ = shingle_hashes, done_callback_ = std::move(done_callback); - state_.reset(new ExtractionState(*page_text_, clock_->Now())); + state_ = std::make_unique<ExtractionState>(*page_text_, clock_->NowTicks()); base::ThreadTaskRunnerHandle::Get()->PostTask( FROM_HERE, base::BindOnce(&PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout, @@ -138,7 +135,7 @@ void PhishingTermFeatureExtractor::CancelPendingExtraction() { void PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout() { DCHECK(state_.get()); ++state_->num_iterations; - base::TimeTicks current_chunk_start_time = clock_->Now(); + base::TimeTicks current_chunk_start_time = clock_->NowTicks(); if (!state_->iterator.get()) { // We failed to initialize the break iterator, so stop now. @@ -158,10 +155,9 @@ void PhishingTermFeatureExtractor::ExtractFeaturesWithTimeout() { if (num_words >= kClockCheckGranularity) { num_words = 0; - base::TimeTicks now = clock_->Now(); + base::TimeTicks now = clock_->NowTicks(); if (now - state_->start_time >= base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) { - DLOG(ERROR) << "Feature extraction took too long, giving up"; // We expect this to happen infrequently, so record when it does. UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.TermFeatureTimeout", 1); RunCallback(false); @@ -261,15 +257,6 @@ void PhishingTermFeatureExtractor::HandleWord( } } -void PhishingTermFeatureExtractor::CheckNoPendingExtraction() { - DCHECK(done_callback_.is_null()); - DCHECK(!state_.get()); - if (!done_callback_.is_null() || state_.get()) { - LOG(ERROR) << "Extraction in progress, missing call to " - << "CancelPendingExtraction"; - } -} - void PhishingTermFeatureExtractor::RunCallback(bool success) { // Record some timing stats that we can use to evaluate feature extraction // performance. These include both successful and failed extractions. @@ -277,7 +264,7 @@ void PhishingTermFeatureExtractor::RunCallback(bool success) { UMA_HISTOGRAM_COUNTS_1M("SBClientPhishing.TermFeatureIterations", state_->num_iterations); UMA_HISTOGRAM_TIMES("SBClientPhishing.TermFeatureTotalTime", - clock_->Now() - state_->start_time); + clock_->NowTicks() - state_->start_time); DCHECK(!done_callback_.is_null()); std::move(done_callback_).Run(success); @@ -285,11 +272,11 @@ void PhishingTermFeatureExtractor::RunCallback(bool success) { } void PhishingTermFeatureExtractor::Clear() { - page_text_ = NULL; - features_ = NULL; - shingle_hashes_ = NULL; + page_text_ = nullptr; + features_ = nullptr; + shingle_hashes_ = nullptr; done_callback_.Reset(); - state_.reset(NULL); + state_.reset(nullptr); } } // namespace safe_browsing diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h index d0cc48d7033..341f60704a2 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h +++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor.h @@ -29,9 +29,9 @@ #include "base/memory/weak_ptr.h" #include "base/strings/string16.h" #include "base/strings/string_piece.h" +#include "base/time/tick_clock.h" namespace safe_browsing { -class FeatureExtractorClock; class FeatureMap; class PhishingTermFeatureExtractor { @@ -63,8 +63,7 @@ class PhishingTermFeatureExtractor { size_t max_words_per_term, uint32_t murmurhash3_seed, size_t max_shingles_per_page, - size_t shingle_size, - FeatureExtractorClock* clock); + size_t shingle_size); ~PhishingTermFeatureExtractor(); // Begins extracting features from |page_text| into the given FeatureMap. @@ -90,6 +89,8 @@ class PhishingTermFeatureExtractor { // is unloaded or the PhishingTermFeatureExtractor is destroyed. void CancelPendingExtraction(); + void SetTickClockForTesting(const base::TickClock* clock) { clock_ = clock; } + private: struct ExtractionState; @@ -115,11 +116,6 @@ class PhishingTermFeatureExtractor { // Handles a single word in the page text. void HandleWord(const base::StringPiece16& word); - // Helper to verify that there is no pending feature extraction. Dies in - // debug builds if the state is not as expected. This is a no-op in release - // builds. - void CheckNoPendingExtraction(); - // Runs |done_callback_| and then clears all internal state. void RunCallback(bool success); @@ -149,7 +145,7 @@ class PhishingTermFeatureExtractor { const size_t shingle_size_; // Non-owned pointer to our clock. - FeatureExtractorClock* clock_; + const base::TickClock* clock_; // The output parameters from the most recent call to ExtractFeatures(). const base::string16* page_text_; // The caller keeps ownership of this. diff --git a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc index c1b38303cbd..53fc9a643fb 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_term_feature_extractor_unittest.cc @@ -24,7 +24,6 @@ #include "base/time/time.h" #include "build/build_config.h" #include "chrome/renderer/safe_browsing/features.h" -#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h" #include "chrome/renderer/safe_browsing/murmurhash3_util.h" #include "chrome/renderer/safe_browsing/test_utils.h" #include "crypto/sha2.h" @@ -33,11 +32,20 @@ using base::ASCIIToUTF16; using ::testing::Return; +using ::testing::StrictMock; static const uint32_t kMurmurHash3Seed = 2777808611U; namespace safe_browsing { +class MockTickClock : public base::TickClock { + public: + MockTickClock() = default; + ~MockTickClock() override = default; + + MOCK_CONST_METHOD0(NowTicks, base::TimeTicks()); +}; + class PhishingTermFeatureExtractorTest : public ::testing::Test { protected: void SetUp() override { @@ -80,14 +88,9 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test { } void ResetExtractor(size_t max_shingles_per_page) { - extractor_.reset(new PhishingTermFeatureExtractor( - &term_hashes_, - &word_hashes_, - 3 /* max_words_per_term */, - kMurmurHash3Seed, - max_shingles_per_page, - 4 /* shingle_size */, - &clock_)); + extractor_ = std::make_unique<PhishingTermFeatureExtractor>( + &term_hashes_, &word_hashes_, 3 /* max_words_per_term */, + kMurmurHash3Seed, max_shingles_per_page, 4 /* shingle_size */); } // Runs the TermFeatureExtractor on |page_text|, waiting for the @@ -133,7 +136,6 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test { base::test::SingleThreadTaskEnvironment task_environment_; std::unique_ptr<base::RunLoop> active_run_loop_; - MockFeatureExtractorClock clock_; std::unique_ptr<PhishingTermFeatureExtractor> extractor_; std::unordered_set<std::string> term_hashes_; std::unordered_set<uint32_t> word_hashes_; @@ -141,9 +143,6 @@ class PhishingTermFeatureExtractorTest : public ::testing::Test { }; TEST_F(PhishingTermFeatureExtractorTest, ExtractFeatures) { - // This test doesn't exercise the extraction timing. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); - base::string16 page_text = ASCIIToUTF16("blah"); FeatureMap expected_features; // initially empty std::set<uint32_t> expected_shingle_hashes; @@ -306,7 +305,8 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) { // Note that this assumes kClockCheckGranularity = 5 and // kMaxTimePerChunkMs = 10. base::TimeTicks now = base::TimeTicks::Now(); - EXPECT_CALL(clock_, Now()) + StrictMock<MockTickClock> tick_clock; + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -328,6 +328,7 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) { .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(28))) // A final check for the histograms. .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30))); + extractor_->SetTickClockForTesting(&tick_clock); FeatureMap expected_features; expected_features.AddBooleanFeature(features::kPageTerm + @@ -396,13 +397,13 @@ TEST_F(PhishingTermFeatureExtractorTest, Continuation) { ExpectFeatureMapsAreEqual(features, expected_features); EXPECT_THAT(expected_shingle_hashes, testing::ContainerEq(shingle_hashes)); // Make sure none of the mock expectations carry over to the next test. - ::testing::Mock::VerifyAndClearExpectations(&clock_); + ::testing::Mock::VerifyAndClearExpectations(&tick_clock); // Now repeat the test with the same text, but advance the clock faster so // that the extraction time exceeds the maximum total time for the feature // extractor. Extraction should fail. Note that this assumes // kMaxTotalTimeMs = 500. - EXPECT_CALL(clock_, Now()) + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -429,7 +430,8 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) { } base::TimeTicks now = base::TimeTicks::Now(); - EXPECT_CALL(clock_, Now()) + StrictMock<MockTickClock> tick_clock; + EXPECT_CALL(tick_clock, NowTicks()) // Time check at the start of extraction. .WillOnce(Return(now)) // Time check at the start of the first chunk of work. @@ -439,6 +441,7 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) { // Time check after the next 5 words. This should be greater than // kMaxTimePerChunkMs so that we stop and schedule extraction for later. .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(14))); + extractor_->SetTickClockForTesting(&tick_clock); FeatureMap features; std::set<uint32_t> shingle_hashes; @@ -454,7 +457,8 @@ TEST_F(PhishingTermFeatureExtractorTest, PartialExtractionTest) { shingle_hashes.clear(); // This part doesn't exercise the extraction timing. - EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now())); + EXPECT_CALL(tick_clock, NowTicks()) + .WillRepeatedly(Return(base::TimeTicks::Now())); // Now extract normally and make sure nothing breaks. EXPECT_TRUE(ExtractFeatures(page_text.get(), &features, &shingle_hashes)); diff --git a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc index 931c5203edd..403d1d060f7 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor.cc @@ -8,7 +8,6 @@ #include <string> #include <vector> -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/strings/string_split.h" #include "base/strings/string_util.h" @@ -44,8 +43,8 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + // Check if TLD exists for host. if (registry_length == 0 || registry_length == std::string::npos) { - DVLOG(1) << "Could not find TLD for host: " << host; return false; } DCHECK_LT(registry_length, host.size()) << "Non-zero registry length, but " @@ -59,8 +58,8 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url, host.erase(tld_start - 1); std::vector<std::string> host_tokens = base::SplitString( host, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY); + // Check if domain exists for host. if (host_tokens.empty()) { - DVLOG(1) << "Could not find domain for host: " << host; return false; } if (!features->AddBooleanFeature(features::kUrlDomainToken + diff --git a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc index e5412a7bd4e..c87ab3daf88 100644 --- a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc +++ b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc @@ -6,6 +6,8 @@ #include <string> #include <vector> +#include "base/format_macros.h" +#include "base/strings/stringprintf.h" #include "chrome/renderer/safe_browsing/features.h" #include "chrome/renderer/safe_browsing/test_utils.h" #include "testing/gmock/include/gmock/gmock.h" @@ -25,10 +27,24 @@ class PhishingUrlFeatureExtractorTest : public ::testing::Test { PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full, tokens); } + + void FillFeatureMap(size_t count, FeatureMap* features) { + for (size_t i = 0; i < count; ++i) { + EXPECT_TRUE( + features->AddBooleanFeature(base::StringPrintf("Feature%" PRIuS, i))); + } + } }; TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { std::string url = "http://123.0.0.1/mydocuments/a.file.html"; + FeatureMap features; + + // If feature map is already full, features cannot be extracted. + FillFeatureMap(FeatureMap::kMaxFeatureMapSize, &features); + ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); + features.Clear(); + FeatureMap expected_features; expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress); expected_features.AddBooleanFeature(features::kUrlPathToken + @@ -38,9 +54,13 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { expected_features.AddBooleanFeature(features::kUrlPathToken + std::string("html")); - FeatureMap features; ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); ExpectFeatureMapsAreEqual(features, expected_features); + // If feature map is already full, features cannot be extracted. + features.Clear(); + FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features); + ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); + features.Clear(); url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear"; expected_features.Clear(); @@ -61,6 +81,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { features.Clear(); ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); ExpectFeatureMapsAreEqual(features, expected_features); + features.Clear(); + // If feature map is already full, features cannot be extracted. + FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 5, &features); + ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); url = "http://justadomain.com/"; expected_features.Clear(); @@ -72,6 +96,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { features.Clear(); ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); ExpectFeatureMapsAreEqual(features, expected_features); + // If feature map is already full, features cannot be extracted. + features.Clear(); + FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 1, &features); + ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); url = "http://witharef.com/#abc"; expected_features.Clear(); @@ -96,6 +124,10 @@ TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) { features.Clear(); ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features)); ExpectFeatureMapsAreEqual(features, expected_features); + // If feature map is already full, features cannot be extracted. + features.Clear(); + FillFeatureMap(FeatureMap::kMaxFeatureMapSize - 2, &features); + ASSERT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); url = "http://unrecognized.tld/"; EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features)); diff --git a/chromium/chrome/renderer/safe_browsing/scorer.cc b/chromium/chrome/renderer/safe_browsing/scorer.cc index 8e37a934f9a..fc592db98c5 100644 --- a/chromium/chrome/renderer/safe_browsing/scorer.cc +++ b/chromium/chrome/renderer/safe_browsing/scorer.cc @@ -10,10 +10,10 @@ #include <unordered_map> #include <unordered_set> -#include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/strings/string_piece.h" #include "chrome/renderer/safe_browsing/features.h" +#include "components/safe_browsing/content/password_protection/visual_utils.h" #include "components/safe_browsing/core/proto/client_model.pb.h" namespace { @@ -57,14 +57,12 @@ Scorer::~Scorer() {} Scorer* Scorer::Create(const base::StringPiece& model_str) { std::unique_ptr<Scorer> scorer(new Scorer()); ClientSideModel& model = scorer->model_; + // Parse the phishing model. if (!model.ParseFromArray(model_str.data(), model_str.size())) { - DLOG(ERROR) << "Unable to parse phishing model. This Scorer object is " - << "invalid."; RecordScorerCreationStatus(SCORER_FAIL_MODEL_PARSE_ERROR); return NULL; } else if (!model.IsInitialized()) { - DLOG(ERROR) << "Unable to parse phishing model. The model is missing " - << "some required fields. Maybe the .proto file changed?"; + // The model may be missing some required fields. RecordScorerCreationStatus(SCORER_FAIL_MODEL_MISSING_FIELDS); return NULL; } @@ -86,6 +84,32 @@ double Scorer::ComputeScore(const FeatureMap& features) const { return LogOdds2Prob(logodds); } +bool Scorer::GetMatchingVisualTargets(const SkBitmap& bitmap, + ClientPhishingRequest* request) const { + bool has_match = false; + for (const VisualTarget& target : model_.vision_model().targets()) { + base::Optional<VisionMatchResult> result = + visual_utils::IsVisualMatch(bitmap, target); + if (result.has_value()) { + *request->add_vision_match() = result.value(); + has_match = true; + } + } + + if (model_.has_vision_model()) { + // Populate these fields for telementry purposes. They will be filtered in + // the browser process if they are not needed. + VisualFeatures::BlurredImage blurred_image; + if (visual_utils::GetBlurredImage(bitmap, &blurred_image)) { + request->set_screenshot_phash( + visual_utils::GetHashFromBlurredImage(blurred_image)); + request->set_phash_dimension_size(48); + } + } + + return has_match; +} + int Scorer::model_version() const { return model_.version(); } diff --git a/chromium/chrome/renderer/safe_browsing/scorer.h b/chromium/chrome/renderer/safe_browsing/scorer.h index 5fda83d4817..597b4cf047b 100644 --- a/chromium/chrome/renderer/safe_browsing/scorer.h +++ b/chromium/chrome/renderer/safe_browsing/scorer.h @@ -23,6 +23,7 @@ #include "base/macros.h" #include "base/strings/string_piece.h" #include "components/safe_browsing/core/proto/client_model.pb.h" +#include "third_party/skia/include/core/SkBitmap.h" namespace safe_browsing { class FeatureMap; @@ -41,6 +42,11 @@ class Scorer { // (range is inclusive on both ends). virtual double ComputeScore(const FeatureMap& features) const; + // This method matches the given |bitmap| against the visual model. It returns + // true if any visual target matches, and populates |request| appropriately. + virtual bool GetMatchingVisualTargets(const SkBitmap& bitmap, + ClientPhishingRequest* request) const; + // Returns the version number of the loaded client model. int model_version() const; diff --git a/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc b/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc index 9ea41fd1ccc..9083eca05a9 100644 --- a/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc +++ b/chromium/chrome/renderer/safe_browsing/scorer_unittest.cc @@ -59,9 +59,39 @@ class PhishingScorerTest : public ::testing::Test { model_.set_murmur_hash_seed(12345U); model_.set_max_shingles_per_page(10); model_.set_shingle_size(3); + + // The first target hash is all 1-bits, except the first 8. + std::vector<unsigned char> target_hash; + target_hash.push_back('\x30'); + for (int i = 0; i < 288; i++) + target_hash.push_back('\xff'); + target_hash[1] = '\x00'; + VisualTarget* target1 = model_.mutable_vision_model()->add_targets(); + target1->set_digest("target1"); + target1->set_hash(target_hash.data(), target_hash.size()); + target1->mutable_match_config()->add_match_rule()->set_hash_distance(8.0); + + // The second target hash is all 1-bits, except the second 8. + target_hash[1] = '\xff'; + target_hash[2] = '\x00'; + VisualTarget* target2 = model_.mutable_vision_model()->add_targets(); + target2->set_digest("target2"); + target2->set_hash(target_hash.data(), target_hash.size()); + target2->mutable_match_config()->add_match_rule()->set_hash_distance(8.0); + + // Allocate a bitmap for testing visual scoring + sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB( + {2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0}, + SkNamedGamut::kRec2020); + SkImageInfo bitmap_info = + SkImageInfo::Make(1000, 1000, SkColorType::kRGBA_8888_SkColorType, + SkAlphaType::kUnpremul_SkAlphaType, rec2020); + + ASSERT_TRUE(bitmap_.tryAllocPixels(bitmap_info)); } ClientSideModel model_; + SkBitmap bitmap_; }; TEST_F(PhishingScorerTest, HasValidModel) { @@ -145,4 +175,48 @@ TEST_F(PhishingScorerTest, ComputeScore) { EXPECT_TRUE(features.AddBooleanFeature("feature2")); EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features)); } + +TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchOne) { + std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); + + // Make the whole image white + for (int x = 0; x < 1000; x++) + for (int y = 0; y < 1000; y++) + *bitmap_.getAddr32(x, y) = 0xffffffff; + + // Make the first 164 pixels black. This will make the first 8 bits of the + // hash 0. + for (int x = 0; x < 164; x++) + *bitmap_.getAddr32(x, 0) = 0xff000000; + + ClientPhishingRequest request; + scorer->GetMatchingVisualTargets(bitmap_, &request); + ASSERT_EQ(request.vision_match_size(), 1); + EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1"); +} + +TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchBoth) { + std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString())); + + // Make the whole image white + for (int x = 0; x < 1000; x++) + for (int y = 0; y < 1000; y++) + *bitmap_.getAddr32(x, y) = 0xffffffff; + + // Create an alternating black/white pattern to match both targets. The + // pattern is 84 black pixels, then 84 white, then 84 black, then 84 white. + // This causes the hash to start 0F0F, for a distance of 8 from both targets. + for (int x = 0; x < 84; x++) + *bitmap_.getAddr32(x, 0) = 0xff000000; + + for (int x = 168; x < 248; x++) + *bitmap_.getAddr32(x, 0) = 0xff000000; + + ClientPhishingRequest request; + scorer->GetMatchingVisualTargets(bitmap_, &request); + ASSERT_EQ(request.vision_match_size(), 2); + EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1"); + EXPECT_EQ(request.vision_match(1).matched_target_digest(), "target2"); +} + } // namespace safe_browsing |