21 files changed, 854 insertions, 519 deletions
diff --git a/chromium/content/browser/speech/DEPS b/chromium/content/browser/speech/DEPS
index d3ee893ebac..7c726080d23 100644
--- a/chromium/content/browser/speech/DEPS
+++ b/chromium/content/browser/speech/DEPS
@@ -1,3 +1,11 @@
 include_rules = [
+  "+components/speech",
   "+google_apis",  # Exception to general rule, see content/DEPS for details.
 ]
+
+specific_include_rules = {
+  "tts_controller_impl\.cc": [
+    # TtsControllerImpl uses GetLanguage(), which is not grd related.
+    "+ui/base/l10n/l10n_util.h",
+  ],
+}
diff --git a/chromium/content/browser/speech/proto/BUILD.gn b/chromium/content/browser/speech/proto/BUILD.gn
deleted file mode 100644
index dafd61f78ee..00000000000
--- a/chromium/content/browser/speech/proto/BUILD.gn
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import("//third_party/protobuf/proto_library.gni")
-
-proto_library("proto") {
-  sources = [ "google_streaming_api.proto" ]
-}
diff --git a/chromium/content/browser/speech/proto/google_streaming_api.proto b/chromium/content/browser/speech/proto/google_streaming_api.proto
deleted file mode 100644
index ce1b8d98a49..00000000000
--- a/chromium/content/browser/speech/proto/google_streaming_api.proto
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-syntax = "proto2";
-option optimize_for = LITE_RUNTIME;
-
-// TODO(hans): Commented out due to compilation errors.
-// option cc_api_version = 2;
-
-package content.proto;
-
-// SpeechRecognitionEvent is the only message type sent to client.
-//
-// The first SpeechRecognitionEvent is an empty (default) message to indicate
-// as early as possible that the stream connection has been established.
-message SpeechRecognitionEvent {
-  enum StatusCode {
-    // Note: in JavaScript API SpeechRecognitionError 0 is "OTHER" error.
-    STATUS_SUCCESS = 0;
-    STATUS_NO_SPEECH = 1;
-    STATUS_ABORTED = 2;
-    STATUS_AUDIO_CAPTURE = 3;
-    STATUS_NETWORK = 4;
-    STATUS_NOT_ALLOWED = 5;
-    STATUS_SERVICE_NOT_ALLOWED = 6;
-    STATUS_BAD_GRAMMAR = 7;
-    STATUS_LANGUAGE_NOT_SUPPORTED = 8;
-  }
-  optional StatusCode status = 1 [default = STATUS_SUCCESS];
-
-  // May contain zero or one final=true result (the newly settled portion).
-  // May also contain zero or more final=false results.
-  // (Note that this differs from JavaScript API resultHistory in that no more
-  // than one final=true result is returned, so client must accumulate
-  // resultHistory by concatenating the final=true results.)
-  repeated SpeechRecognitionResult result = 2;
-
-  enum EndpointerEventType {
-    START_OF_SPEECH = 0;
-    END_OF_SPEECH = 1;
-    END_OF_AUDIO = 2;  // End of audio stream has been reached.
-    // End of utterance indicates that no more speech segments are expected.
-    END_OF_UTTERANCE = 3;
-  }
-
-  optional EndpointerEventType endpoint = 4;
-};
-
-message SpeechRecognitionResult {
-  repeated SpeechRecognitionAlternative alternative = 1;
-
-  // True if this is the final time the speech service will return this
-  // particular SpeechRecognitionResult. If false, then this represents an
-  // interim result that could still be changed.
-  optional bool final = 2 [default = false];
-
-  // An estimate of the probability that the recognizer will not change its
-  // guess about this interim result.  Values range from 0.0 (completely
-  // unstable) to 1.0 (completely stable).  Note that this is not the same as
-  // "confidence", which estimate the probability that a recognition result
-  // is correct. This field is only provided for interim (final=false) results.
-  optional float stability = 3;
-};
-
-// Item in N-best list.
-message SpeechRecognitionAlternative {
-  // Spoken text.
-  optional string transcript = 1;
-
-  // The confidence estimate between 0.0 and 1.0.  A higher number means the
-  // system is more confident that the recognition is correct.
-  // This field is typically provided only for the top hypothesis and only for
-  // final results.
-  optional float confidence = 2;
-}
diff --git a/chromium/content/browser/speech/speech_recognition_browsertest.cc b/chromium/content/browser/speech/speech_recognition_browsertest.cc
index bb688ed2d2e..cf1dc5cbf2c 100644
--- a/chromium/content/browser/speech/speech_recognition_browsertest.cc
+++ b/chromium/content/browser/speech/speech_recognition_browsertest.cc
@@ -17,15 +17,14 @@
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/sys_byteorder.h"
-#include "base/task/post_task.h"
 #include "base/threading/thread_task_runner_handle.h"
 #include "build/build_config.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
 #include "content/browser/speech/speech_recognition_engine.h"
 #include "content/browser/speech/speech_recognition_manager_impl.h"
 #include "content/browser/speech/speech_recognizer_impl.h"
 #include "content/public/browser/browser_task_traits.h"
 #include "content/public/browser/browser_thread.h"
+#include "content/public/browser/google_streaming_api.pb.h"
 #include "content/public/browser/notification_types.h"
 #include "content/public/browser/web_contents.h"
 #include "content/public/test/browser_test.h"
@@ -61,8 +60,8 @@ class MockAudioSystem : public media::AudioSystem {
 
     // Posting callback to allow current SpeechRecognizerImpl dispatching event
     // to complete before transitioning to the next FSM state.
-    base::PostTask(
-        FROM_HERE, {content::BrowserThread::IO},
+    content::GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(std::move(on_params_cb),
                        media::AudioParameters::UnavailableDeviceParams()));
   }
@@ -230,8 +229,8 @@ class SpeechRecognitionBrowserTest : public ContentBrowserTest {
     // AudioCaptureSourcer::Stop() again.
     SpeechRecognizerImpl::SetAudioEnvironmentForTesting(nullptr, nullptr);
 
-    base::PostTask(FROM_HERE, {content::BrowserThread::UI},
-                   base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse,
+    content::GetUIThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse,
                                   base::Unretained(this)));
   }
 
diff --git a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
index 443516d9a70..155fc32d1ca 100644
--- a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
+++ b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
@@ -9,7 +9,6 @@
 #include "base/bind.h"
 #include "base/command_line.h"
 #include "base/lazy_instance.h"
-#include "base/task/post_task.h"
 #include "content/browser/browser_plugin/browser_plugin_guest.h"
 #include "content/browser/frame_host/frame_tree_node.h"
 #include "content/browser/frame_host/render_frame_host_manager.h"
@@ -62,8 +61,8 @@ void SpeechRecognitionDispatcherHost::Start(
     blink::mojom::StartSpeechRecognitionRequestParamsPtr params) {
   DCHECK_CURRENTLY_ON(BrowserThread::IO);
 
-  base::PostTask(
-      FROM_HERE, {BrowserThread::UI},
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE,
       base::BindOnce(&SpeechRecognitionDispatcherHost::StartRequestOnUI,
                      AsWeakPtr(), render_process_id_, render_frame_id_,
                      std::move(params)));
@@ -89,6 +88,14 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
   WebContentsImpl* web_contents =
       static_cast<WebContentsImpl*>(WebContents::FromRenderFrameHost(rfh));
 
+  // Disable BackForwardCache when using the SpeechRecognition feature, because
+  // currently we do not handle speech recognition after placing the page in
+  // BackForwardCache.
+  // TODO(sreejakshetty): Make SpeechRecognition compatible with
+  // BackForwardCache.
+  rfh->OnSchedulerTrackedFeatureUsed(
+      blink::scheduler::WebSchedulerTrackedFeature::kSpeechRecognizer);
+
   // If the speech API request was from an inner WebContents or a guest, save
   // the context of the outer WebContents or the embedder since we will use it
   // to decide permission.
@@ -126,8 +133,8 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
   StoragePartition* storage_partition = BrowserContext::GetStoragePartition(
       browser_context, web_contents->GetSiteInstance());
 
-  base::PostTask(
-      FROM_HERE, {BrowserThread::IO},
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE,
       base::BindOnce(
           &SpeechRecognitionDispatcherHost::StartSessionOnIO,
           speech_recognition_dispatcher_host, std::move(params),
diff --git a/chromium/content/browser/speech/speech_recognition_engine.cc b/chromium/content/browser/speech/speech_recognition_engine.cc
index e55a430e581..c7af8469cee 100644
--- a/chromium/content/browser/speech/speech_recognition_engine.cc
+++ b/chromium/content/browser/speech/speech_recognition_engine.cc
@@ -15,7 +15,7 @@
 #include "base/strings/utf_string_conversions.h"
 #include "base/time/time.h"
 #include "content/browser/speech/audio_buffer.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/browser/google_streaming_api.pb.h"
 #include "google_apis/google_api_keys.h"
 #include "mojo/public/c/system/types.h"
 #include "mojo/public/cpp/bindings/receiver_set.h"
@@ -23,8 +23,6 @@
 #include "net/base/load_flags.h"
 #include "net/traffic_annotation/network_traffic_annotation.h"
 #include "services/network/public/cpp/shared_url_loader_factory.h"
-#include "services/network/public/cpp/simple_url_loader.h"
-#include "services/network/public/mojom/chunked_data_pipe_getter.mojom.h"
 #include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
 #include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
 
@@ -42,9 +40,6 @@ const char* web_service_base_url_for_tests = nullptr;
 // This matches the maximum maxAlternatives value supported by the server.
 const uint32_t kMaxMaxAlternatives = 30;
 
-// Maximum amount of data written per Mojo write.
-const uint32_t kMaxUploadWrite = 128 * 1024;
-
 // TODO(hans): Remove this and other logging when we don't need it anymore.
 void DumpResponse(const std::string& response) {
   DVLOG(1) << "------------";
@@ -81,198 +76,6 @@ const uint32_t kDefaultMaxHypotheses = 1;
 
 }  // namespace
 
-// Streams sound data up to the server.
-class SpeechRecognitionEngine::UpstreamLoader
-    : public network::mojom::ChunkedDataPipeGetter {
- public:
-  UpstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request,
-                 net::NetworkTrafficAnnotationTag upstream_traffic_annotation,
-                 network::mojom::URLLoaderFactory* url_loader_factory,
-                 SpeechRecognitionEngine* speech_recognition_engine)
-      : speech_recognition_engine_(speech_recognition_engine) {
-    // Attach a chunked upload body.
-    mojo::PendingRemote<network::mojom::ChunkedDataPipeGetter> data_remote;
-    receiver_set_.Add(this, data_remote.InitWithNewPipeAndPassReceiver());
-    resource_request->request_body = new network::ResourceRequestBody();
-    resource_request->request_body->SetToChunkedDataPipe(
-        std::move(data_remote));
-    simple_url_loader_ = network::SimpleURLLoader::Create(
-        std::move(resource_request), upstream_traffic_annotation);
-    simple_url_loader_->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
-        url_loader_factory,
-        base::BindOnce(&UpstreamLoader::OnComplete, base::Unretained(this)));
-  }
-
-  ~UpstreamLoader() override = default;
-
-  void OnComplete(std::unique_ptr<std::string> response_body) {
-    int response_code = -1;
-    if (simple_url_loader_->ResponseInfo() &&
-        simple_url_loader_->ResponseInfo()->headers) {
-      response_code =
-          simple_url_loader_->ResponseInfo()->headers->response_code();
-    }
-    speech_recognition_engine_->OnUpstreamDataComplete(response_body != nullptr,
-                                                       response_code);
-  }
-
-  void AppendChunkToUpload(const std::string& data, bool is_last_chunk) {
-    DCHECK(!has_last_chunk_);
-
-    upload_body_ += data;
-    if (is_last_chunk) {
-      // Send size before the rest of the body. While it doesn't matter much, if
-      // the other side receives the size before the last chunk, which Mojo does
-      // not gaurantee, some protocols can merge the data and the last chunk
-      // itself into a single frame.
-      has_last_chunk_ = is_last_chunk;
-      if (get_size_callback_)
-        std::move(get_size_callback_).Run(net::OK, upload_body_.size());
-    }
-
-    SendData();
-  }
-
- private:
-  void OnUploadPipeWriteable(MojoResult unused) { SendData(); }
-
-  // Attempts to send more of the upload body, if more data is available, and
-  // |upload_pipe_| is valid.
-  void SendData() {
-    DCHECK_LE(upload_position_, upload_body_.size());
-
-    if (!upload_pipe_.is_valid())
-      return;
-
-    // Nothing more to write yet, or done writing everything.
-    if (upload_position_ == upload_body_.size())
-      return;
-
-    // Since kMaxUploadWrite is a uint32_t, no overflow occurs in this downcast.
-    uint32_t write_bytes = std::min(upload_body_.length() - upload_position_,
-                                    static_cast<size_t>(kMaxUploadWrite));
-    MojoResult result =
-        upload_pipe_->WriteData(upload_body_.data() + upload_position_,
-                                &write_bytes, MOJO_WRITE_DATA_FLAG_NONE);
-
-    // Wait for the pipe to have more capacity available, if needed.
-    if (result == MOJO_RESULT_SHOULD_WAIT) {
-      upload_pipe_watcher_->ArmOrNotify();
-      return;
-    }
-
-    // Do nothing on pipe closure - depend on the SimpleURLLoader to notice the
-    // other pipes being closed on error. Can reach this point if there's a
-    // retry, for instance, so cannot draw any conclusions here.
-    if (result != MOJO_RESULT_OK)
-      return;
-
-    upload_position_ += write_bytes;
-    // If more data is available, arm the watcher again. Don't write again in a
-    // loop, even if WriteData would allow it, to avoid blocking the current
-    // thread.
-    if (upload_position_ < upload_body_.size())
-      upload_pipe_watcher_->ArmOrNotify();
-  }
-
-  // mojom::ChunkedDataPipeGetter implementation:
-
-  void GetSize(GetSizeCallback get_size_callback) override {
-    if (has_last_chunk_) {
-      std::move(get_size_callback).Run(net::OK, upload_body_.size());
-    } else {
-      get_size_callback_ = std::move(get_size_callback);
-    }
-  }
-
-  void StartReading(mojo::ScopedDataPipeProducerHandle pipe) override {
-    // Delete any existing pipe, if any.
-    upload_pipe_watcher_.reset();
-    upload_pipe_ = std::move(pipe);
-    upload_pipe_watcher_ = std::make_unique<mojo::SimpleWatcher>(
-        FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::MANUAL);
-    upload_pipe_watcher_->Watch(
-        upload_pipe_.get(), MOJO_HANDLE_SIGNAL_WRITABLE,
-        base::BindRepeating(&UpstreamLoader::OnUploadPipeWriteable,
-                            base::Unretained(this)));
-    upload_position_ = 0;
-
-    // Will attempt to start sending the request body, if any data is available.
-    SendData();
-  }
-
-  // Partial upload body. Have to cache the entire thing in memory, in case have
-  // to replay it.
-  std::string upload_body_;
-  // Current position in |upload_body_|.  All bytes before this point have been
-  // written to |upload_pipe_|.
-  size_t upload_position_ = 0;
-  // Whether |upload_body_| is complete.
-  bool has_last_chunk_ = false;
-
-  // Current pipe being used to send the |upload_body_| to the URLLoader.
-  mojo::ScopedDataPipeProducerHandle upload_pipe_;
-  // Watches |upload_pipe_| for writeability.
-  std::unique_ptr<mojo::SimpleWatcher> upload_pipe_watcher_;
-
-  // If non-null, invoked once the size of the upload is known.
-  network::mojom::ChunkedDataPipeGetter::GetSizeCallback get_size_callback_;
-
-  SpeechRecognitionEngine* const speech_recognition_engine_;
-  std::unique_ptr<network::SimpleURLLoader> simple_url_loader_;
-  mojo::ReceiverSet<network::mojom::ChunkedDataPipeGetter> receiver_set_;
-
-  DISALLOW_COPY_AND_ASSIGN(UpstreamLoader);
-};
-
-// Streams response data from the server to the SpeechRecognitionEngine.
-class SpeechRecognitionEngine::DownstreamLoader
-    : public network::SimpleURLLoaderStreamConsumer {
- public:
-  DownstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request,
-                   net::NetworkTrafficAnnotationTag upstream_traffic_annotation,
-                   network::mojom::URLLoaderFactory* url_loader_factory,
-                   SpeechRecognitionEngine* speech_recognition_engine)
-      : speech_recognition_engine_(speech_recognition_engine) {
-    simple_url_loader_ = network::SimpleURLLoader::Create(
-        std::move(resource_request), upstream_traffic_annotation);
-    simple_url_loader_->DownloadAsStream(url_loader_factory, this);
-  }
-
-  ~DownstreamLoader() override = default;
-
-  // SimpleURLLoaderStreamConsumer implementation:
-
-  void OnDataReceived(base::StringPiece string_piece,
-                      base::OnceClosure resume) override {
-    speech_recognition_engine_->OnDownstreamDataReceived(string_piece);
-    std::move(resume).Run();
-  }
-
-  void OnComplete(bool success) override {
-    int response_code = -1;
-    if (simple_url_loader_->ResponseInfo() &&
-        simple_url_loader_->ResponseInfo()->headers) {
-      response_code =
-          simple_url_loader_->ResponseInfo()->headers->response_code();
-    }
-
-    speech_recognition_engine_->OnDownstreamDataComplete(success,
-                                                         response_code);
-  }
-
-  void OnRetry(base::OnceClosure start_retry) override {
-    // Retries are not enabled for these requests.
-    NOTREACHED();
-  }
-
- private:
-  SpeechRecognitionEngine* const speech_recognition_engine_;
-  std::unique_ptr<network::SimpleURLLoader> simple_url_loader_;
-
-  DISALLOW_COPY_AND_ASSIGN(DownstreamLoader);
-};
-
 SpeechRecognitionEngine::Config::Config()
     : filter_profanities(false),
       continuous(true),
@@ -562,7 +365,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
   auto downstream_request = std::make_unique<network::ResourceRequest>();
   downstream_request->credentials_mode = network::mojom::CredentialsMode::kOmit;
   downstream_request->url = downstream_url;
-  downstream_loader_ = std::make_unique<DownstreamLoader>(
+  downstream_loader_ = std::make_unique<speech::DownstreamLoader>(
       std::move(downstream_request), downstream_traffic_annotation,
       shared_url_loader_factory_.get(), this);
 
@@ -667,7 +470,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
                                         encoder_->GetMimeType());
   }
 
-  upstream_loader_ = std::make_unique<UpstreamLoader>(
+  upstream_loader_ = std::make_unique<speech::UpstreamLoader>(
       std::move(upstream_request), upstream_traffic_annotation,
       shared_url_loader_factory_.get(), this);
 
diff --git a/chromium/content/browser/speech/speech_recognition_engine.h b/chromium/content/browser/speech/speech_recognition_engine.h
index 1f3501200e5..d1e99750312 100644
--- a/chromium/content/browser/speech/speech_recognition_engine.h
+++ b/chromium/content/browser/speech/speech_recognition_engine.h
@@ -14,6 +14,10 @@
 #include "base/memory/ref_counted.h"
 #include "base/sequence_checker.h"
 #include "base/strings/string_piece.h"
+#include "components/speech/downstream_loader.h"
+#include "components/speech/downstream_loader_client.h"
+#include "components/speech/upstream_loader.h"
+#include "components/speech/upstream_loader_client.h"
 #include "content/browser/speech/audio_encoder.h"
 #include "content/browser/speech/chunked_byte_buffer.h"
 #include "content/common/content_export.h"
@@ -59,7 +63,9 @@ struct SpeechRecognitionError;
 // EndRecognition. If a recognition was started, the caller can free the
 // SpeechRecognitionEngine only after calling EndRecognition.
 
-class CONTENT_EXPORT SpeechRecognitionEngine {
+class CONTENT_EXPORT SpeechRecognitionEngine
+    : public speech::UpstreamLoaderClient,
+      public speech::DownstreamLoaderClient {
  public:
   class Delegate {
    public:
@@ -104,7 +110,7 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
   SpeechRecognitionEngine(
       scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory,
       const std::string& accept_language);
-  ~SpeechRecognitionEngine();
+  ~SpeechRecognitionEngine() override;
 
   // Sets the URL requests are sent to for tests.
   static void set_web_service_base_url_for_tests(
@@ -119,8 +125,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
   int GetDesiredAudioChunkDurationMs() const;
 
  private:
-  class UpstreamLoader;
-  class DownstreamLoader;
+  friend class speech::UpstreamLoaderClient;
+  friend class speech::DownstreamLoader;
 
   Delegate* delegate_;
 
@@ -171,10 +177,12 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
     DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
   };
 
-  void OnUpstreamDataComplete(bool success, int response_code);
+  // speech::UpstreamLoaderClient
+  void OnUpstreamDataComplete(bool success, int response_code) override;
 
-  void OnDownstreamDataReceived(base::StringPiece new_response_data);
-  void OnDownstreamDataComplete(bool success, int response_code);
+  // speech::DownstreamLoaderClient
+  void OnDownstreamDataReceived(base::StringPiece new_response_data) override;
+  void OnDownstreamDataComplete(bool success, int response_code) override;
 
   // Entry point for pushing any new external event into the recognizer FSM.
   void DispatchEvent(const FSMEventArgs& event_args);
@@ -204,8 +212,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
   void UploadAudioChunk(const std::string& data, FrameType type, bool is_final);
 
   Config config_;
-  std::unique_ptr<UpstreamLoader> upstream_loader_;
-  std::unique_ptr<DownstreamLoader> downstream_loader_;
+  std::unique_ptr<speech::UpstreamLoader> upstream_loader_;
+  std::unique_ptr<speech::DownstreamLoader> downstream_loader_;
   scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory_;
   const std::string accept_language_;
   std::unique_ptr<AudioEncoder> encoder_;
diff --git a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
index 1312af4f6f6..a68c4165ff5 100644
--- a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
+++ b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
@@ -17,7 +17,7 @@
 #include "base/sys_byteorder.h"
 #include "base/test/task_environment.h"
 #include "content/browser/speech/audio_buffer.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/browser/google_streaming_api.pb.h"
 #include "mojo/public/cpp/bindings/remote.h"
 #include "net/base/net_errors.h"
 #include "net/http/http_response_headers.h"
diff --git a/chromium/content/browser/speech/speech_recognition_manager_impl.cc b/chromium/content/browser/speech/speech_recognition_manager_impl.cc
index 9a55a7c6e80..4220ef86ec3 100644
--- a/chromium/content/browser/speech/speech_recognition_manager_impl.cc
+++ b/chromium/content/browser/speech/speech_recognition_manager_impl.cc
@@ -15,7 +15,6 @@
 #include "base/memory/ref_counted_delete_on_sequence.h"
 #include "base/sequenced_task_runner.h"
 #include "base/single_thread_task_runner.h"
-#include "base/task/post_task.h"
 #include "base/threading/thread_task_runner_handle.h"
 #include "build/build_config.h"
 #include "content/browser/browser_main_loop.h"
@@ -196,10 +195,9 @@ void SpeechRecognitionManagerImpl::FrameDeletionObserver::ContentsObserver::
     RenderFrameDeleted(RenderFrameHost* render_frame_host) {
   auto iters = observed_frames_.equal_range(render_frame_host);
   for (auto it = iters.first; it != iters.second; ++it) {
-    base::CreateSingleThreadTaskRunner({BrowserThread::IO})
-        ->PostTask(FROM_HERE,
-                   base::BindOnce(parent_observer_->frame_deleted_callback_,
-                                  it->second));
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE,
+        base::BindOnce(parent_observer_->frame_deleted_callback_, it->second));
   }
 
   observed_frames_.erase(iters.first, iters.second);
@@ -303,14 +301,13 @@ int SpeechRecognitionManagerImpl::CreateSession(
 
   // The deletion observer is owned by this class, so it's safe to use
   // Unretained.
-  base::CreateSingleThreadTaskRunner({BrowserThread::UI})
-      ->PostTask(
-          FROM_HERE,
-          base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
-                             CreateObserverForSession,
-                         base::Unretained(frame_deletion_observer_.get()),
-                         config.initial_context.render_process_id,
-                         config.initial_context.render_frame_id, session_id));
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE,
+      base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
+                         CreateObserverForSession,
+                     base::Unretained(frame_deletion_observer_.get()),
+                     config.initial_context.render_process_id,
+                     config.initial_context.render_frame_id, session_id));
 
   return session_id;
 }
@@ -413,15 +410,14 @@ void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
 
   // The deletion observer is owned by this class, so it's safe to use
   // Unretained.
-  base::CreateSingleThreadTaskRunner({BrowserThread::UI})
-      ->PostTask(
-          FROM_HERE,
-          base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
-                             RemoveObserverForSession,
-                         base::Unretained(frame_deletion_observer_.get()),
-                         iter->second->config.initial_context.render_process_id,
-                         iter->second->config.initial_context.render_frame_id,
-                         session_id));
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE,
+      base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
+                         RemoveObserverForSession,
+                     base::Unretained(frame_deletion_observer_.get()),
+                     iter->second->config.initial_context.render_process_id,
+                     iter->second->config.initial_context.render_frame_id,
+                     session_id));
 
   AbortSessionImpl(session_id);
 }
diff --git a/chromium/content/browser/speech/speech_recognizer.h b/chromium/content/browser/speech/speech_recognizer.h
index 64c896518a2..7a5f5204eee 100644
--- a/chromium/content/browser/speech/speech_recognizer.h
+++ b/chromium/content/browser/speech/speech_recognizer.h
@@ -5,7 +5,7 @@
 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
 
-#include "base/logging.h"
+#include "base/check.h"
 #include "base/macros.h"
 #include "base/memory/ref_counted.h"
 #include "content/common/content_export.h"
diff --git a/chromium/content/browser/speech/speech_recognizer_impl.cc b/chromium/content/browser/speech/speech_recognizer_impl.cc
index b03a554f559..c17b704614b 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl.cc
@@ -11,7 +11,6 @@
 #include "base/bind.h"
 #include "base/macros.h"
 #include "base/numerics/ranges.h"
-#include "base/task/post_task.h"
 #include "base/time/time.h"
 #include "build/build_config.h"
 #include "content/browser/browser_main_loop.h"
@@ -223,22 +222,22 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
   DCHECK(!device_id.empty());
   device_id_ = device_id;
 
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(),
                                 FSMEventArgs(EVENT_PREPARE)));
 }
 
 void SpeechRecognizerImpl::AbortRecognition() {
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(),
                                 FSMEventArgs(EVENT_ABORT)));
 }
 
 void SpeechRecognizerImpl::StopAudioCapture() {
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(),
                                 FSMEventArgs(EVENT_STOP_CAPTURE)));
 }
@@ -278,15 +277,15 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data,
   // Convert audio from native format to fixed format used by WebSpeech.
   FSMEventArgs event_args(EVENT_AUDIO_DATA);
   event_args.audio_data = audio_converter_->Convert(data);
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(), event_args));
   // See http://crbug.com/506051 regarding why one extra convert call can
   // sometimes be required. It should be a rare case.
   if (!audio_converter_->data_was_converted()) {
     event_args.audio_data = audio_converter_->Convert(data);
-    base::PostTask(FROM_HERE, {BrowserThread::IO},
-                   base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                   weak_ptr_factory_.GetWeakPtr(), event_args));
   }
   // Something is seriously wrong here and we are most likely missing some
@@ -296,8 +295,8 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data,
 
 void SpeechRecognizerImpl::OnCaptureError(const std::string& message) {
   FSMEventArgs event_args(EVENT_AUDIO_ERROR);
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(), event_args));
 }
 
@@ -305,8 +304,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults(
     const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
   FSMEventArgs event_args(EVENT_ENGINE_RESULT);
   event_args.engine_results = mojo::Clone(results);
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(), event_args));
 }
 
@@ -319,8 +318,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
     const blink::mojom::SpeechRecognitionError& error) {
   FSMEventArgs event_args(EVENT_ENGINE_ERROR);
   event_args.engine_error = error;
-  base::PostTask(FROM_HERE, {BrowserThread::IO},
-                 base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
                                 weak_ptr_factory_.GetWeakPtr(), event_args));
 }
 
@@ -880,6 +879,7 @@ void SpeechRecognizerImpl::CreateAudioCapturerSource() {
       stream_factory.InitWithNewPipeAndPassReceiver());
   audio_capturer_source_ = audio::CreateInputDevice(
       std::move(stream_factory), device_id_,
+      audio::DeadStreamDetection::kEnabled,
       MediaInternals::GetInstance()->CreateMojoAudioLog(
           media::AudioLogFactory::AUDIO_INPUT_CONTROLLER,
           0 /* component_id */));
diff --git a/chromium/content/browser/speech/speech_recognizer_impl_android.cc b/chromium/content/browser/speech/speech_recognizer_impl_android.cc
index c72dd8464c2..49ff8496d40 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl_android.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl_android.cc
@@ -12,7 +12,6 @@
 #include "base/android/scoped_java_ref.h"
 #include "base/bind.h"
 #include "base/strings/utf_string_conversions.h"
-#include "base/task/post_task.h"
 #include "content/public/android/content_jni_headers/SpeechRecognitionImpl_jni.h"
 #include "content/public/browser/browser_task_traits.h"
 #include "content/public/browser/browser_thread.h"
@@ -42,14 +41,14 @@ void SpeechRecognizerImplAndroid::StartRecognition(
     const std::string& device_id) {
   DCHECK_CURRENTLY_ON(BrowserThread::IO);
   // TODO(xians): Open the correct device for speech on Android.
-  base::PostTask(
-      FROM_HERE, {BrowserThread::IO},
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE,
       base::BindOnce(&SpeechRecognitionEventListener::OnRecognitionStart,
                      base::Unretained(listener()), session_id()));
   SpeechRecognitionSessionConfig config =
       SpeechRecognitionManager::GetInstance()->GetSessionConfig(session_id());
-  base::PostTask(
-      FROM_HERE, {BrowserThread::UI},
+  GetUIThreadTaskRunner({})->PostTask(
+      FROM_HERE,
       base::BindOnce(
           &content::SpeechRecognizerImplAndroid::StartRecognitionOnUIThread,
           this, config.language, config.continuous, config.interim_results));
@@ -71,8 +70,8 @@ void SpeechRecognizerImplAndroid::StartRecognitionOnUIThread(
 void SpeechRecognizerImplAndroid::AbortRecognition() {
   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
     state_ = STATE_IDLE;
-    base::PostTask(
-        FROM_HERE, {BrowserThread::UI},
+    GetUIThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&content::SpeechRecognizerImplAndroid::AbortRecognition,
                        this));
     return;
@@ -85,8 +84,8 @@ void SpeechRecognizerImplAndroid::AbortRecognition() {
 
 void SpeechRecognizerImplAndroid::StopAudioCapture() {
   if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
-    base::PostTask(
-        FROM_HERE, {BrowserThread::UI},
+    GetUIThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&content::SpeechRecognizerImplAndroid::StopAudioCapture,
                        this));
     return;
@@ -111,8 +110,8 @@ void SpeechRecognizerImplAndroid::OnAudioStart(
     JNIEnv* env,
     const JavaParamRef<jobject>& obj) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(FROM_HERE, {BrowserThread::IO},
-                   base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart,
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart,
                                   this, nullptr, nullptr));
     return;
   }
@@ -125,8 +124,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart(
     JNIEnv* env,
     const JavaParamRef<jobject>& obj) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(FROM_HERE, {BrowserThread::IO},
-                   base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart,
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart,
                                   this, nullptr, nullptr));
     return;
   }
@@ -137,8 +136,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart(
 void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env,
                                              const JavaParamRef<jobject>& obj) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(FROM_HERE, {BrowserThread::IO},
-                   base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd,
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd,
                                   this, nullptr, nullptr));
     return;
   }
@@ -149,8 +148,8 @@ void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env,
 void SpeechRecognizerImplAndroid::OnAudioEnd(JNIEnv* env,
                                              const JavaParamRef<jobject>& obj) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(FROM_HERE, {BrowserThread::IO},
-                   base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd,
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd,
                                   this, nullptr, nullptr));
     return;
   }
@@ -181,8 +180,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
         options[i], static_cast<double>(scores[i])));
   }
   result->is_provisional = provisional;
-  base::PostTask(
-      FROM_HERE, {BrowserThread::IO},
+  GetIOThreadTaskRunner({})->PostTask(
+      FROM_HERE,
       base::BindOnce(
           &SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread, this,
           std::move(results)));
@@ -199,8 +198,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionError(
     const JavaParamRef<jobject>& obj,
     jint error) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(
-        FROM_HERE, {BrowserThread::IO},
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionError, this,
                        nullptr, nullptr, error));
     return;
@@ -217,8 +216,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionEnd(
     JNIEnv* env,
     const JavaParamRef<jobject>& obj) {
   if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
-    base::PostTask(
-        FROM_HERE, {BrowserThread::IO},
+    GetIOThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionEnd, this,
                        nullptr, nullptr));
     return;
diff --git a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
index 7645df8014d..a8369b26471 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -18,9 +18,9 @@
 #include "base/test/scoped_feature_list.h"
 #include "base/threading/thread.h"
 #include "base/threading/thread_task_runner_handle.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
 #include "content/browser/speech/speech_recognition_engine.h"
 #include "content/browser/speech/speech_recognizer_impl.h"
+#include "content/public/browser/google_streaming_api.pb.h"
 #include "content/public/browser/speech_recognition_event_listener.h"
 #include "content/public/common/content_features.h"
 #include "content/public/test/browser_task_environment.h"
diff --git a/chromium/content/browser/speech/speech_synthesis_impl.cc b/chromium/content/browser/speech/speech_synthesis_impl.cc
index 1ab5ad01656..e46103bcb66 100644
--- a/chromium/content/browser/speech/speech_synthesis_impl.cc
+++ b/chromium/content/browser/speech/speech_synthesis_impl.cc
@@ -4,6 +4,8 @@
 
 #include "content/browser/speech/speech_synthesis_impl.h"
 
+#include "content/browser/speech/tts_utterance_impl.h"
+
 namespace content {
 namespace {
 
@@ -85,9 +87,11 @@ void SendVoiceListToObserver(
 
 }  // namespace
 
-SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context)
-    : browser_context_(browser_context) {
+SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context,
+                                         WebContents* web_contents)
+    : browser_context_(browser_context), web_contents_(web_contents) {
   DCHECK(browser_context_);
+  DCHECK(web_contents_);
   TtsController::GetInstance()->AddVoicesChangedDelegate(this);
 }
 
@@ -120,8 +124,8 @@ void SpeechSynthesisImpl::AddVoiceListObserver(
 void SpeechSynthesisImpl::Speak(
     blink::mojom::SpeechSynthesisUtterancePtr utterance,
     mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) {
-  std::unique_ptr<TtsUtterance> tts_utterance(
-      TtsUtterance::Create((browser_context_)));
+  std::unique_ptr<TtsUtterance> tts_utterance =
+      std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_);
   tts_utterance->SetText(utterance->text);
   tts_utterance->SetLang(utterance->lang);
   tts_utterance->SetVoiceName(utterance->voice);
diff --git a/chromium/content/browser/speech/speech_synthesis_impl.h b/chromium/content/browser/speech/speech_synthesis_impl.h
index 7db29e521cb..96cdacf46eb 100644
--- a/chromium/content/browser/speech/speech_synthesis_impl.h
+++ b/chromium/content/browser/speech/speech_synthesis_impl.h
@@ -12,6 +12,7 @@
 
 namespace content {
 class BrowserContext;
+class WebContents;
 
 // Back-end for the web speech synthesis API; dispatches speech requests to
 // content::TtsController and forwards voice lists and events back to the
@@ -19,7 +20,8 @@ class BrowserContext;
 class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis,
                             public VoicesChangedDelegate {
  public:
-  explicit SpeechSynthesisImpl(BrowserContext* browser_context);
+  SpeechSynthesisImpl(BrowserContext* browser_context,
+                      WebContents* web_contents);
   ~SpeechSynthesisImpl() override;
 
   SpeechSynthesisImpl(const SpeechSynthesisImpl&) = delete;
@@ -44,6 +46,8 @@ class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis,
 
  private:
   BrowserContext* browser_context_;
+  WebContents* web_contents_;
+
   mojo::ReceiverSet<blink::mojom::SpeechSynthesis> receiver_set_;
   mojo::RemoteSet<blink::mojom::SpeechSynthesisVoiceListObserver> observer_set_;
 };
diff --git a/chromium/content/browser/speech/tts_controller_impl.cc b/chromium/content/browser/speech/tts_controller_impl.cc
index 108a5a7d0a6..e34045ca4e1 100644
--- a/chromium/content/browser/speech/tts_controller_impl.cc
+++ b/chromium/content/browser/speech/tts_controller_impl.cc
@@ -6,6 +6,7 @@
 
 #include <stddef.h>
 
+#include <algorithm>
 #include <string>
 #include <vector>
 
@@ -16,20 +17,47 @@
 #include "base/metrics/user_metrics.h"
 #include "base/values.h"
 #include "build/build_config.h"
+#include "content/browser/speech/tts_utterance_impl.h"
 #include "content/public/browser/content_browser_client.h"
+#include "content/public/browser/visibility.h"
+#include "content/public/browser/web_contents.h"
 #include "content/public/common/content_client.h"
 #include "services/data_decoder/public/cpp/safe_xml_parser.h"
 #include "services/data_decoder/public/mojom/xml_parser.mojom.h"
 #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
+#include "ui/base/l10n/l10n_util.h"
 
-namespace content {
+#if defined(OS_CHROMEOS)
+#include "content/public/browser/tts_controller_delegate.h"
+#endif
 
+namespace content {
+namespace {
 // A value to be used to indicate that there is no char index available.
 const int kInvalidCharIndex = -1;
 
 // A value to be used to indicate that there is no length available.
 const int kInvalidLength = -1;
 
+#if defined(OS_CHROMEOS)
+bool VoiceIdMatches(
+    const base::Optional<TtsControllerDelegate::PreferredVoiceId>& id,
+    const content::VoiceData& voice) {
+  if (!id.has_value() || voice.name.empty() ||
+      (voice.engine_id.empty() && !voice.native))
+    return false;
+  if (voice.native)
+    return id->name == voice.name && id->id.empty();
+  return id->name == voice.name && id->id == voice.engine_id;
+}
+#endif  // defined(OS_CHROMEOS)
+
+TtsUtteranceImpl* AsUtteranceImpl(TtsUtterance* utterance) {
+  return static_cast<TtsUtteranceImpl*>(utterance);
+}
+
+}  // namespace
+
 //
 // VoiceData
 //
@@ -77,16 +105,12 @@ TtsControllerImpl* TtsControllerImpl::GetInstance() {
   return base::Singleton<TtsControllerImpl>::get();
 }
 
-TtsControllerImpl::TtsControllerImpl()
-    : delegate_(nullptr),
-      current_utterance_(nullptr),
-      paused_(false),
-      tts_platform_(nullptr) {}
+TtsControllerImpl::TtsControllerImpl() = default;
 
 TtsControllerImpl::~TtsControllerImpl() {
   if (current_utterance_) {
     current_utterance_->Finish();
-    current_utterance_.reset();
+    SetCurrentUtterance(nullptr);
   }
 
   // Clear any queued utterances too.
@@ -95,17 +119,22 @@ TtsControllerImpl::~TtsControllerImpl() {
 
 void TtsControllerImpl::SpeakOrEnqueue(
     std::unique_ptr<TtsUtterance> utterance) {
+  if (!ShouldSpeakUtterance(utterance.get())) {
+    utterance->Finish();
+    return;
+  }
+
   // If we're paused and we get an utterance that can't be queued,
   // flush the queue but stay in the paused state.
   if (paused_ && !utterance->GetCanEnqueue()) {
-    utterance_deque_.emplace_back(std::move(utterance));
+    utterance_list_.emplace_back(std::move(utterance));
     Stop();
     paused_ = true;
     return;
   }
 
   if (paused_ || (IsSpeaking() && utterance->GetCanEnqueue())) {
-    utterance_deque_.emplace_back(std::move(utterance));
+    utterance_list_.emplace_back(std::move(utterance));
   } else {
     Stop();
     SpeakNow(std::move(utterance));
@@ -113,26 +142,30 @@ void TtsControllerImpl::SpeakOrEnqueue(
 }
 
 void TtsControllerImpl::Stop() {
-  StopInternal(GURL());
+  StopAndClearQueue(GURL());
 }
 
 void TtsControllerImpl::Stop(const GURL& source_url) {
-  StopInternal(source_url);
+  StopAndClearQueue(source_url);
+}
+
+void TtsControllerImpl::StopAndClearQueue(const GURL& source_url) {
+  if (StopCurrentUtteranceIfMatches(source_url))
+    ClearUtteranceQueue(true);
 }
 
-void TtsControllerImpl::StopInternal(const GURL& source_url) {
+bool TtsControllerImpl::StopCurrentUtteranceIfMatches(const GURL& source_url) {
   base::RecordAction(base::UserMetricsAction("TextToSpeech.Stop"));
 
   paused_ = false;
 
   if (!source_url.is_empty() && current_utterance_ &&
       current_utterance_->GetSrcUrl().GetOrigin() != source_url.GetOrigin())
-    return;
+    return false;
 
   if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
-    if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
-      GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop(
-          current_utterance_.get());
+    if (engine_delegate_)
+      engine_delegate_->Stop(current_utterance_.get());
   } else {
     GetTtsPlatform()->ClearError();
     GetTtsPlatform()->StopSpeaking();
@@ -142,7 +175,7 @@ void TtsControllerImpl::StopInternal(const GURL& source_url) {
     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
                                    kInvalidLength, std::string());
   FinishCurrentUtterance();
-  ClearUtteranceQueue(true);  // Send events.
+  return true;
 }
 
 void TtsControllerImpl::Pause() {
@@ -150,9 +183,8 @@ void TtsControllerImpl::Pause() {
 
   paused_ = true;
   if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
-    if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
-      GetTtsControllerDelegate()->GetTtsEngineDelegate()->Pause(
-          current_utterance_.get());
+    if (engine_delegate_)
+      engine_delegate_->Pause(current_utterance_.get());
   } else if (current_utterance_) {
     GetTtsPlatform()->ClearError();
     GetTtsPlatform()->Pause();
@@ -164,9 +196,8 @@ void TtsControllerImpl::Resume() {
 
   paused_ = false;
   if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
-    if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
-      GetTtsControllerDelegate()->GetTtsEngineDelegate()->Resume(
-          current_utterance_.get());
+    if (engine_delegate_)
+      engine_delegate_->Resume(current_utterance_.get());
   } else if (current_utterance_) {
     GetTtsPlatform()->ClearError();
     GetTtsPlatform()->Resume();
@@ -245,11 +276,8 @@ void TtsControllerImpl::GetVoices(BrowserContext* browser_context,
       tts_platform->GetVoices(out_voices);
   }
 
-  if (browser_context) {
-    TtsControllerDelegate* delegate = GetTtsControllerDelegate();
-    if (delegate && delegate->GetTtsEngineDelegate())
-      delegate->GetTtsEngineDelegate()->GetVoices(browser_context, out_voices);
-  }
+  if (browser_context && engine_delegate_)
+    engine_delegate_->GetVoices(browser_context, out_voices);
 }
 
 bool TtsControllerImpl::IsSpeaking() {
@@ -276,22 +304,21 @@ void TtsControllerImpl::RemoveVoicesChangedDelegate(
 void TtsControllerImpl::RemoveUtteranceEventDelegate(
     UtteranceEventDelegate* delegate) {
   // First clear any pending utterances with this delegate.
-  std::deque<std::unique_ptr<TtsUtterance>> old_deque;
-  utterance_deque_.swap(old_deque);
-  while (!old_deque.empty()) {
-    std::unique_ptr<TtsUtterance> utterance = std::move(old_deque.front());
-    old_deque.pop_front();
+  std::list<std::unique_ptr<TtsUtterance>> old_list;
+  utterance_list_.swap(old_list);
+  while (!old_list.empty()) {
+    std::unique_ptr<TtsUtterance> utterance = std::move(old_list.front());
+    old_list.pop_front();
     if (utterance->GetEventDelegate() != delegate)
-      utterance_deque_.emplace_back(std::move(utterance));
+      utterance_list_.emplace_back(std::move(utterance));
   }
 
   if (current_utterance_ &&
       current_utterance_->GetEventDelegate() == delegate) {
     current_utterance_->SetEventDelegate(nullptr);
     if (!current_utterance_->GetEngineId().empty()) {
-      if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
-        GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop(
-            current_utterance_.get());
+      if (engine_delegate_)
+        engine_delegate_->Stop(current_utterance_.get());
     } else {
       GetTtsPlatform()->ClearError();
       GetTtsPlatform()->StopSpeaking();
@@ -304,17 +331,11 @@ void TtsControllerImpl::RemoveUtteranceEventDelegate(
 }
 
 void TtsControllerImpl::SetTtsEngineDelegate(TtsEngineDelegate* delegate) {
-  if (!GetTtsControllerDelegate())
-    return;
-
-  GetTtsControllerDelegate()->SetTtsEngineDelegate(delegate);
+  engine_delegate_ = delegate;
 }
 
 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
-  if (!GetTtsControllerDelegate())
-    return nullptr;
-
-  return GetTtsControllerDelegate()->GetTtsEngineDelegate();
+  return engine_delegate_;
 }
 
 void TtsControllerImpl::OnBrowserContextDestroyed(
@@ -322,7 +343,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed(
   bool did_clear_utterances = false;
 
   // First clear the BrowserContext from any utterances.
-  for (std::unique_ptr<TtsUtterance>& utterance : utterance_deque_) {
+  for (std::unique_ptr<TtsUtterance>& utterance : utterance_list_) {
     if (utterance->GetBrowserContext() == browser_context) {
       utterance->ClearBrowserContext();
       did_clear_utterances = true;
@@ -342,7 +363,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed(
   // safe to use base::Unretained because this is a singleton.
   if (did_clear_utterances) {
     base::ThreadTaskRunnerHandle::Get()->PostTask(
-        FROM_HERE, base::BindOnce(&TtsControllerImpl::StopInternal,
+        FROM_HERE, base::BindOnce(&TtsControllerImpl::StopAndClearQueue,
                                   base::Unretained(this), GURL()));
   }
 }
@@ -352,7 +373,7 @@ void TtsControllerImpl::SetTtsPlatform(TtsPlatform* tts_platform) {
 }
 
 int TtsControllerImpl::QueueSize() {
-  return static_cast<int>(utterance_deque_.size());
+  return static_cast<int>(utterance_list_.size());
 }
 
 TtsPlatform* TtsControllerImpl::GetTtsPlatform() {
@@ -362,15 +383,6 @@ TtsPlatform* TtsControllerImpl::GetTtsPlatform() {
 }
 
 void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
-  // Note: this would only happen if a content embedder failed to provide
-  // their own TtsControllerDelegate. Chrome provides one, and Content Shell
-  // provides a mock one for web tests.
-  if (!GetTtsControllerDelegate()) {
-    utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
-                          kInvalidLength, std::string());
-    return;
-  }
-
   // Get all available voices and try to find a matching voice.
   std::vector<VoiceData> voices;
   GetVoices(utterance->GetBrowserContext(), &voices);
@@ -379,8 +391,7 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
   // to true because that might trigger deferred loading of native voices.
   // TODO(katie): Move most of the GetMatchingVoice logic into content/ and
   // use the TTS controller delegate to get chrome-specific info as needed.
-  int index =
-      GetTtsControllerDelegate()->GetMatchingVoice(utterance.get(), voices);
+  int index = GetMatchingVoice(utterance.get(), voices);
   VoiceData voice;
   if (index >= 0)
     voice = voices[index];
@@ -411,23 +422,22 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
   if (!voice.native) {
 #if !defined(OS_ANDROID)
     DCHECK(!voice.engine_id.empty());
-    current_utterance_ = std::move(utterance);
+    SetCurrentUtterance(std::move(utterance));
     current_utterance_->SetEngineId(voice.engine_id);
-    if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
-      GetTtsControllerDelegate()->GetTtsEngineDelegate()->Speak(
-          current_utterance_.get(), voice);
+    if (engine_delegate_)
+      engine_delegate_->Speak(current_utterance_.get(), voice);
     bool sends_end_event =
         voice.events.find(TTS_EVENT_END) != voice.events.end();
     if (!sends_end_event) {
       current_utterance_->Finish();
-      current_utterance_.reset();
+      SetCurrentUtterance(nullptr);
       SpeakNextUtterance();
     }
-#endif
+#endif  // !defined(OS_ANDROID)
   } else {
     // It's possible for certain platforms to send start events immediately
     // during |speak|.
-    current_utterance_ = std::move(utterance);
+    SetCurrentUtterance(std::move(utterance));
     GetTtsPlatform()->ClearError();
     GetTtsPlatform()->Speak(
         current_utterance_->GetId(), current_utterance_->GetText(),
@@ -451,20 +461,20 @@ void TtsControllerImpl::OnSpeakFinished(int utterance_id, bool success) {
   // the browser has built-in TTS that isn't loaded yet.
   if (GetTtsPlatform()->LoadBuiltInTtsEngine(
           current_utterance_->GetBrowserContext())) {
-    utterance_deque_.emplace_back(std::move(current_utterance_));
+    utterance_list_.emplace_back(std::move(current_utterance_));
     return;
   }
 
   current_utterance_->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
                                  kInvalidLength, GetTtsPlatform()->GetError());
-  current_utterance_.reset();
+  SetCurrentUtterance(nullptr);
 }
 
 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
-  while (!utterance_deque_.empty()) {
+  while (!utterance_list_.empty()) {
     std::unique_ptr<TtsUtterance> utterance =
-        std::move(utterance_deque_.front());
-    utterance_deque_.pop_front();
+        std::move(utterance_list_.front());
+    utterance_list_.pop_front();
     if (send_events) {
       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
                             kInvalidLength, std::string());
@@ -479,7 +489,7 @@ void TtsControllerImpl::FinishCurrentUtterance() {
     if (!current_utterance_->IsFinished())
       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
                                      kInvalidLength, std::string());
-    current_utterance_.reset();
+    SetCurrentUtterance(nullptr);
   }
 }
 
@@ -489,11 +499,14 @@ void TtsControllerImpl::SpeakNextUtterance() {
 
   // Start speaking the next utterance in the queue.  Keep trying in case
   // one fails but there are still more in the queue to try.
-  while (!utterance_deque_.empty() && !current_utterance_) {
+  while (!utterance_list_.empty() && !current_utterance_) {
     std::unique_ptr<TtsUtterance> utterance =
-        std::move(utterance_deque_.front());
-    utterance_deque_.pop_front();
-    SpeakNow(std::move(utterance));
+        std::move(utterance_list_.front());
+    utterance_list_.pop_front();
+    if (ShouldSpeakUtterance(utterance.get()))
+      SpeakNow(std::move(utterance));
+    else
+      utterance->Finish();
   }
 }
 
@@ -502,8 +515,9 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) {
   double pitch = utterance->GetContinuousParameters().pitch;
   double volume = utterance->GetContinuousParameters().volume;
 #if defined(OS_CHROMEOS)
-  GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs(utterance, &rate,
-                                                               &pitch, &volume);
+  if (GetTtsControllerDelegate())
+    GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs(
+        utterance, &rate, &pitch, &volume);
 #else
   // Update pitch, rate and volume to defaults if not explicity set on
   // this utterance.
@@ -517,14 +531,8 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) {
   utterance->SetContinuousParameters(rate, pitch, volume);
 }
 
-TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() {
-  if (delegate_)
-    return delegate_;
-  if (GetContentClient() && GetContentClient()->browser()) {
-    delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate();
-    return delegate_;
-  }
-  return nullptr;
+void TtsControllerImpl::SetStopSpeakingWhenHidden(bool value) {
+  stop_speaking_when_hidden_ = value;
 }
 
 void TtsControllerImpl::StripSSML(
@@ -595,4 +603,175 @@ void TtsControllerImpl::PopulateParsedText(std::string* parsed_text,
   }
 }
 
+int TtsControllerImpl::GetMatchingVoice(TtsUtterance* utterance,
+                                        const std::vector<VoiceData>& voices) {
+  const std::string app_lang =
+      GetContentClient()->browser()->GetApplicationLocale();
+  // Start with a best score of -1, that way even if none of the criteria
+  // match, something will be returned if there are any voices.
+  int best_score = -1;
+  int best_score_index = -1;
+#if defined(OS_CHROMEOS)
+  TtsControllerDelegate* delegate = GetTtsControllerDelegate();
+  std::unique_ptr<TtsControllerDelegate::PreferredVoiceIds> preferred_ids =
+      delegate ? delegate->GetPreferredVoiceIdsForUtterance(utterance)
+               : nullptr;
+#endif  // defined(OS_CHROMEOS)
+  for (size_t i = 0; i < voices.size(); ++i) {
+    const content::VoiceData& voice = voices[i];
+    int score = 0;
+
+    // If the extension ID is specified, check for an exact match.
+    if (!utterance->GetEngineId().empty() &&
+        utterance->GetEngineId() != voice.engine_id)
+      continue;
+
+    // If the voice name is specified, check for an exact match.
+    if (!utterance->GetVoiceName().empty() &&
+        voice.name != utterance->GetVoiceName())
+      continue;
+
+    // Prefer the utterance language.
+    if (!voice.lang.empty() && !utterance->GetLang().empty()) {
+      // An exact language match is worth more than a partial match.
+      if (voice.lang == utterance->GetLang()) {
+        score += 128;
+      } else if (l10n_util::GetLanguage(voice.lang) ==
+                 l10n_util::GetLanguage(utterance->GetLang())) {
+        score += 64;
+      }
+    }
+
+    // Next, prefer required event types.
+    if (!utterance->GetRequiredEventTypes().empty()) {
+      bool has_all_required_event_types = true;
+      for (TtsEventType event_type : utterance->GetRequiredEventTypes()) {
+        if (voice.events.find(event_type) == voice.events.end()) {
+          has_all_required_event_types = false;
+          break;
+        }
+      }
+      if (has_all_required_event_types)
+        score += 32;
+    }
+
+#if defined(OS_CHROMEOS)
+    if (preferred_ids) {
+      // First prefer the user's preference voice for the utterance language,
+      // if the utterance language is specified.
+      if (!utterance->GetLang().empty() &&
+          VoiceIdMatches(preferred_ids->lang_voice_id, voice)) {
+        score += 16;
+      }
+
+      // Then prefer the user's preference voice for the system language.
+      // This is a lower priority match than the utterance voice.
+      if (VoiceIdMatches(preferred_ids->locale_voice_id, voice))
+        score += 8;
+
+      // Finally, prefer the user's preference voice for any language. This will
+      // pick the default voice if there is no better match for the current
+      // system language and utterance language.
+      if (VoiceIdMatches(preferred_ids->any_locale_voice_id, voice))
+        score += 4;
+    }
+#endif  // defined(OS_CHROMEOS)
+
+    // Finally, prefer system language.
+    if (!voice.lang.empty()) {
+      if (voice.lang == app_lang) {
+        score += 2;
+      } else if (l10n_util::GetLanguage(voice.lang) ==
+                 l10n_util::GetLanguage(app_lang)) {
+        score += 1;
+      }
+    }
+
+    if (score > best_score) {
+      best_score = score;
+      best_score_index = i;
+    }
+  }
+
+  return best_score_index;
+}
+
+void TtsControllerImpl::SetCurrentUtterance(
+    std::unique_ptr<TtsUtterance> utterance) {
+  current_utterance_ = std::move(utterance);
+  Observe(current_utterance_
+              ? AsUtteranceImpl(current_utterance_.get())->web_contents()
+              : nullptr);
+}
+
+void TtsControllerImpl::StopCurrentUtteranceAndRemoveUtterancesMatching(
+    WebContents* wc) {
+  DCHECK(wc);
+  // Removes any utterances that match the WebContents from the current
+  // utterance (which our inherited WebContentsObserver starts observing every
+  // time the utterance changes).
+  //
+  // This is called when the WebContents for the current utterance is destroyed
+  // or hidden. In the case where it's destroyed, this is done to avoid
+  // attempting to start a utterance that is very likely to be destroyed right
+  // away, and there are also subtle timing issues if we didn't do this (if a
+  // queued utterance has already received WebContentsDestroyed(), and we start
+  // it, we won't get the corresponding WebContentsDestroyed()).
+  auto eraser = [wc](const std::unique_ptr<TtsUtterance>& utterance) {
+    TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance.get());
+    if (utterance_impl->web_contents() == wc) {
+      utterance_impl->Finish();
+      return true;
+    }
+    return false;
+  };
+  utterance_list_.erase(
+      std::remove_if(utterance_list_.begin(), utterance_list_.end(), eraser),
+      utterance_list_.end());
+  const bool stopped = StopCurrentUtteranceIfMatches(GURL());
+  DCHECK(stopped);
+  SpeakNextUtterance();
+}
+
+bool TtsControllerImpl::ShouldSpeakUtterance(TtsUtterance* utterance) {
+  TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance);
+  if (!utterance_impl->was_created_with_web_contents())
+    return true;
+
+  // If the WebContents that created the utterance has been destroyed, don't
+  // speak it.
+  if (!utterance_impl->web_contents())
+    return false;
+
+  // Allow speaking if either the WebContents is visible, or the WebContents
+  // isn't required to be visible before speaking.
+  return !stop_speaking_when_hidden_ ||
+         utterance_impl->web_contents()->GetVisibility() != Visibility::HIDDEN;
+}
+
+//
+// WebContentsObserver
+//
+
+void TtsControllerImpl::WebContentsDestroyed() {
+  StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents());
+}
+
+void TtsControllerImpl::OnVisibilityChanged(Visibility visibility) {
+  if (visibility == Visibility::HIDDEN && stop_speaking_when_hidden_)
+    StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents());
+}
+
+#if defined(OS_CHROMEOS)
+TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() {
+  if (delegate_)
+    return delegate_;
+  if (GetContentClient() && GetContentClient()->browser()) {
+    delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate();
+    return delegate_;
+  }
+  return nullptr;
+}
+#endif  // defined(OS_CHROMEOS)
+
 }  // namespace content
diff --git a/chromium/content/browser/speech/tts_controller_impl.h b/chromium/content/browser/speech/tts_controller_impl.h
index 052a8841be9..638c3691d6d 100644
--- a/chromium/content/browser/speech/tts_controller_impl.h
+++ b/chromium/content/browser/speech/tts_controller_impl.h
@@ -5,9 +5,8 @@
 #ifndef CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_
 #define CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_
 
-#include <deque>
+#include <list>
 #include <memory>
-#include <set>
 #include <string>
 #include <vector>
 
@@ -22,18 +21,23 @@
 #include "build/build_config.h"
 #include "content/common/content_export.h"
 #include "content/public/browser/tts_controller.h"
-#include "content/public/browser/tts_controller_delegate.h"
 #include "content/public/browser/tts_platform.h"
+#include "content/public/browser/web_contents_observer.h"
 #include "services/data_decoder/public/cpp/data_decoder.h"
 #include "url/gurl.h"
 
 namespace content {
 class BrowserContext;
 
+#if defined(OS_CHROMEOS)
+class TtsControllerDelegate;
+#endif
+
 // Singleton class that manages text-to-speech for all TTS engines and
 // APIs, maintaining a queue of pending utterances and keeping
 // track of all state.
-class CONTENT_EXPORT TtsControllerImpl : public TtsController {
+class CONTENT_EXPORT TtsControllerImpl : public TtsController,
+                                         public WebContentsObserver {
  public:
   // Get the single instance of this class.
   static TtsControllerImpl* GetInstance();
@@ -58,6 +62,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
   void RemoveUtteranceEventDelegate(UtteranceEventDelegate* delegate) override;
   void SetTtsEngineDelegate(TtsEngineDelegate* delegate) override;
   TtsEngineDelegate* GetTtsEngineDelegate() override;
+  void SetStopSpeakingWhenHidden(bool value) override;
 
   // Called directly by ~BrowserContext, because a raw BrowserContext pointer
   // is stored in an Utterance.
@@ -77,6 +82,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
   ~TtsControllerImpl() override;
 
  private:
+  friend class TtsControllerTestHelper;
   FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestTtsControllerShutdown);
   FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestGetMatchingVoice);
   FRIEND_TEST_ALL_PREFIXES(TtsControllerTest,
@@ -92,7 +98,13 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
   // |utterance| or delete it if there's an error. Returns true on success.
   void SpeakNow(std::unique_ptr<TtsUtterance> utterance);
 
-  void StopInternal(const GURL& source_url);
+  // If the current utterance matches |source_url|, it is stopped and the
+  // utterance queue cleared.
+  void StopAndClearQueue(const GURL& source_url);
+
+  // Stops the current utterance if it matches |source_url|. Returns true on
+  // success, false if the current utterance does not match |source_url|.
+  bool StopCurrentUtteranceIfMatches(const GURL& source_url);
 
   // Clear the utterance queue. If send_events is true, will send
   // TTS_EVENT_CANCELLED events on each one.
@@ -120,9 +132,31 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
   static void PopulateParsedText(std::string* parsed_text,
                                  const base::Value* element);
 
+  int GetMatchingVoice(TtsUtterance* utterance,
+                       const std::vector<VoiceData>& voices);
+
+  // Called internally to set |current_utterance_|.
+  void SetCurrentUtterance(std::unique_ptr<TtsUtterance> utterance);
+
+  // Used when the WebContents of the current utterance is destroyed/hidden.
+  void StopCurrentUtteranceAndRemoveUtterancesMatching(WebContents* wc);
+
+  // Returns true if the utterance should be spoken.
+  bool ShouldSpeakUtterance(TtsUtterance* utterance);
+
+  // WebContentsObserver methods
+  void WebContentsDestroyed() override;
+  void OnVisibilityChanged(Visibility visibility) override;
+
+#if defined(OS_CHROMEOS)
   TtsControllerDelegate* GetTtsControllerDelegate();
 
-  TtsControllerDelegate* delegate_;
+  TtsControllerDelegate* delegate_ = nullptr;
+#endif
+
+  TtsEngineDelegate* engine_delegate_ = nullptr;
+
+  bool stop_speaking_when_hidden_ = false;
 
   // A set of delegates that want to be notified when the voices change.
   base::ObserverList<VoicesChangedDelegate> voices_changed_delegates_;
@@ -131,14 +165,14 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
   std::unique_ptr<TtsUtterance> current_utterance_;
 
   // Whether the queue is paused or not.
-  bool paused_;
+  bool paused_ = false;
 
   // A pointer to the platform implementation of text-to-speech, for
   // dependency injection.
-  TtsPlatform* tts_platform_;
+  TtsPlatform* tts_platform_ = nullptr;
 
   // A queue of utterances to speak after the current one finishes.
-  std::deque<std::unique_ptr<TtsUtterance>> utterance_deque_;
+  std::list<std::unique_ptr<TtsUtterance>> utterance_list_;
 
   DISALLOW_COPY_AND_ASSIGN(TtsControllerImpl);
 };
diff --git a/chromium/content/browser/speech/tts_controller_unittest.cc b/chromium/content/browser/speech/tts_controller_unittest.cc
index 2282f8db739..593814ad203 100644
--- a/chromium/content/browser/speech/tts_controller_unittest.cc
+++ b/chromium/content/browser/speech/tts_controller_unittest.cc
@@ -4,25 +4,39 @@
 
 // Unit tests for the TTS Controller.
 
+#include "content/browser/speech/tts_controller_impl.h"
+
 #include "base/memory/ptr_util.h"
 #include "base/values.h"
-#include "content/browser/speech/tts_controller_impl.h"
-#include "content/public/browser/tts_controller_delegate.h"
+#include "content/browser/speech/tts_utterance_impl.h"
 #include "content/public/browser/tts_platform.h"
+#include "content/public/browser/visibility.h"
 #include "content/public/test/browser_task_environment.h"
 #include "content/public/test/test_browser_context.h"
+#include "content/public/test/test_renderer_host.h"
+#include "content/test/test_content_browser_client.h"
+#include "content/test/test_web_contents.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
 
-namespace content {
+#if defined(OS_CHROMEOS)
+#include "content/public/browser/tts_controller_delegate.h"
+#endif
 
-class TtsControllerTest : public testing::Test {};
+namespace content {
 
 // Platform Tts implementation that does nothing.
 class MockTtsPlatformImpl : public TtsPlatform {
  public:
-  MockTtsPlatformImpl() {}
-  virtual ~MockTtsPlatformImpl() {}
+  MockTtsPlatformImpl() = default;
+  virtual ~MockTtsPlatformImpl() = default;
+
+  void set_voices(const std::vector<VoiceData>& voices) { voices_ = voices; }
+
+  void set_run_speak_callback(bool value) { run_speak_callback_ = value; }
+  void set_is_speaking(bool value) { is_speaking_ = value; }
+
+  // TtsPlatform:
   bool PlatformImplAvailable() override { return true; }
   void Speak(int utterance_id,
              const std::string& utterance,
@@ -30,13 +44,16 @@ class MockTtsPlatformImpl : public TtsPlatform {
              const VoiceData& voice,
              const UtteranceContinuousParameters& params,
              base::OnceCallback<void(bool)> on_speak_finished) override {
-    std::move(on_speak_finished).Run(true);
+    if (run_speak_callback_)
+      std::move(on_speak_finished).Run(true);
   }
-  bool IsSpeaking() override { return false; }
+  bool IsSpeaking() override { return is_speaking_; }
   bool StopSpeaking() override { return true; }
   void Pause() override {}
   void Resume() override {}
-  void GetVoices(std::vector<VoiceData>* out_voices) override {}
+  void GetVoices(std::vector<VoiceData>* out_voices) override {
+    *out_voices = voices_;
+  }
   bool LoadBuiltInTtsEngine(BrowserContext* browser_context) override {
     return false;
   }
@@ -45,12 +62,20 @@ class MockTtsPlatformImpl : public TtsPlatform {
   void SetError(const std::string& error) override {}
   std::string GetError() override { return std::string(); }
   void ClearError() override {}
+
+ private:
+  std::vector<VoiceData> voices_;
+  bool run_speak_callback_ = true;
+  bool is_speaking_ = false;
 };
 
+#if defined(OS_CHROMEOS)
 class MockTtsControllerDelegate : public TtsControllerDelegate {
  public:
-  MockTtsControllerDelegate() {}
-  ~MockTtsControllerDelegate() override {}
+  MockTtsControllerDelegate() = default;
+  ~MockTtsControllerDelegate() override = default;
+
+  void SetPreferredVoiceIds(const PreferredVoiceIds& ids) { ids_ = ids; }
 
   BrowserContext* GetLastBrowserContext() {
     BrowserContext* result = last_browser_context_;
@@ -58,10 +83,12 @@ class MockTtsControllerDelegate : public TtsControllerDelegate {
     return result;
   }
 
-  int GetMatchingVoice(content::TtsUtterance* utterance,
-                       std::vector<content::VoiceData>& voices) override {
+  // TtsControllerDelegate:
+  std::unique_ptr<PreferredVoiceIds> GetPreferredVoiceIdsForUtterance(
+      TtsUtterance* utterance) override {
     last_browser_context_ = utterance->GetBrowserContext();
-    return -1;
+    auto ids = std::make_unique<PreferredVoiceIds>(ids_);
+    return ids;
   }
 
   void UpdateUtteranceDefaultsFromPrefs(content::TtsUtterance* utterance,
@@ -69,15 +96,11 @@ class MockTtsControllerDelegate : public TtsControllerDelegate {
                                         double* pitch,
                                         double* volume) override {}
 
-  void SetTtsEngineDelegate(content::TtsEngineDelegate* delegate) override {}
-
-  content::TtsEngineDelegate* GetTtsEngineDelegate() override {
-    return nullptr;
-  }
-
  private:
   BrowserContext* last_browser_context_ = nullptr;
+  PreferredVoiceIds ids_;
 };
+#endif
 
 // Subclass of TtsController with a public ctor and dtor.
 class TtsControllerForTesting : public TtsControllerImpl {
@@ -86,11 +109,14 @@ class TtsControllerForTesting : public TtsControllerImpl {
   ~TtsControllerForTesting() override {}
 };
 
-TEST_F(TtsControllerTest, TestTtsControllerShutdown) {
+TEST(TtsControllerTest, TestTtsControllerShutdown) {
   MockTtsPlatformImpl platform_impl;
-  TtsControllerForTesting* controller = new TtsControllerForTesting();
-  MockTtsControllerDelegate* delegate = new MockTtsControllerDelegate();
-  controller->delegate_ = delegate;
+  std::unique_ptr<TtsControllerForTesting> controller =
+      std::make_unique<TtsControllerForTesting>();
+#if defined(OS_CHROMEOS)
+  MockTtsControllerDelegate delegate;
+  controller->delegate_ = &delegate;
+#endif
 
   controller->SetTtsPlatform(&platform_impl);
 
@@ -106,13 +132,11 @@ TEST_F(TtsControllerTest, TestTtsControllerShutdown) {
 
   // Make sure that deleting the controller when there are pending
   // utterances doesn't cause a crash.
-  delete controller;
-
-  // Clean up.
-  delete delegate;
+  controller.reset();
 }
 
-TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
+#if defined(OS_CHROMEOS)
+TEST(TtsControllerTest, TestBrowserContextRemoved) {
   // Create a controller, mock other stuff, and create a test
   // browser context.
   TtsControllerImpl* controller = TtsControllerImpl::GetInstance();
@@ -123,9 +147,17 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
   content::BrowserTaskEnvironment task_environment;
   auto browser_context = std::make_unique<TestBrowserContext>();
 
+  std::vector<VoiceData> voices;
+  VoiceData voice_data;
+  voice_data.engine_id = "x";
+  voice_data.events.insert(TTS_EVENT_END);
+  voices.push_back(voice_data);
+  platform_impl.set_voices(voices);
+
   // Speak an utterances associated with this test browser context.
   std::unique_ptr<TtsUtterance> utterance1 =
       TtsUtterance::Create(browser_context.get());
+  utterance1->SetEngineId("x");
   utterance1->SetCanEnqueue(true);
   utterance1->SetSrcId(1);
   controller->SpeakOrEnqueue(std::move(utterance1));
@@ -137,6 +169,7 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
   // this browser context.
   std::unique_ptr<TtsUtterance> utterance2 =
       TtsUtterance::Create(browser_context.get());
+  utterance2->SetEngineId("x");
   utterance2->SetCanEnqueue(true);
   utterance2->SetSrcId(2);
   controller->SpeakOrEnqueue(std::move(utterance2));
@@ -150,9 +183,8 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
   controller->SpeakNextUtterance();
   ASSERT_EQ(nullptr, delegate.GetLastBrowserContext());
 }
-
-#if !defined(OS_CHROMEOS)
-TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
+#else
+TEST(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
   std::unique_ptr<TtsControllerForTesting> controller =
       std::make_unique<TtsControllerForTesting>();
 
@@ -175,6 +207,337 @@ TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
   EXPECT_EQ(blink::mojom::kSpeechSynthesisDefaultVolume,
             utterance1->GetContinuousParameters().volume);
 }
-#endif  // !defined(OS_CHROMEOS)
+#endif
+
+TEST(TtsControllerTest, TestGetMatchingVoice) {
+  std::unique_ptr<TtsControllerForTesting> controller =
+      std::make_unique<TtsControllerForTesting>();
+#if defined(OS_CHROMEOS)
+  MockTtsControllerDelegate delegate;
+  controller->delegate_ = &delegate;
+#endif
+
+  TestContentBrowserClient::GetInstance()->set_application_locale("en");
+
+  {
+    // Calling GetMatchingVoice with no voices returns -1.
+    std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+    std::vector<VoiceData> voices;
+    EXPECT_EQ(-1, controller->GetMatchingVoice(utterance.get(), voices));
+  }
+
+  {
+    // Calling GetMatchingVoice with any voices returns the first one
+    // even if there are no criteria that match.
+    std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+    std::vector<VoiceData> voices(2);
+    EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+  }
+
+  {
+    // If nothing else matches, the English voice is returned.
+    // (In tests the language will always be English.)
+    std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+    std::vector<VoiceData> voices;
+    VoiceData fr_voice;
+    fr_voice.lang = "fr";
+    voices.push_back(fr_voice);
+    VoiceData en_voice;
+    en_voice.lang = "en";
+    voices.push_back(en_voice);
+    VoiceData de_voice;
+    de_voice.lang = "de";
+    voices.push_back(de_voice);
+    EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+  }
+
+  {
+    // Check precedence of various matching criteria.
+    std::vector<VoiceData> voices;
+    VoiceData voice0;
+    voices.push_back(voice0);
+    VoiceData voice1;
+    voice1.events.insert(TTS_EVENT_WORD);
+    voices.push_back(voice1);
+    VoiceData voice2;
+    voice2.lang = "de-DE";
+    voices.push_back(voice2);
+    VoiceData voice3;
+    voice3.lang = "fr-CA";
+    voices.push_back(voice3);
+    VoiceData voice4;
+    voice4.name = "Voice4";
+    voices.push_back(voice4);
+    VoiceData voice5;
+    voice5.engine_id = "id5";
+    voices.push_back(voice5);
+    VoiceData voice6;
+    voice6.engine_id = "id7";
+    voice6.name = "Voice6";
+    voice6.lang = "es-es";
+    voices.push_back(voice6);
+    VoiceData voice7;
+    voice7.engine_id = "id7";
+    voice7.name = "Voice7";
+    voice7.lang = "es-mx";
+    voices.push_back(voice7);
+    VoiceData voice8;
+    voice8.engine_id = "";
+    voice8.name = "Android";
+    voice8.lang = "";
+    voice8.native = true;
+    voices.push_back(voice8);
+
+    std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+    EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+
+    std::set<TtsEventType> types;
+    types.insert(TTS_EVENT_WORD);
+    utterance->SetRequiredEventTypes(types);
+    EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+
+    utterance->SetLang("de-DE");
+    EXPECT_EQ(2, controller->GetMatchingVoice(utterance.get(), voices));
+
+    utterance->SetLang("fr-FR");
+    EXPECT_EQ(3, controller->GetMatchingVoice(utterance.get(), voices));
+
+    utterance->SetVoiceName("Voice4");
+    EXPECT_EQ(4, controller->GetMatchingVoice(utterance.get(), voices));
+
+    utterance->SetVoiceName("");
+    utterance->SetEngineId("id5");
+    EXPECT_EQ(5, controller->GetMatchingVoice(utterance.get(), voices));
+
+#if defined(OS_CHROMEOS)
+    TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids;
+    preferred_voice_ids.locale_voice_id.emplace("Voice7", "id7");
+    preferred_voice_ids.any_locale_voice_id.emplace("Android", "");
+    delegate.SetPreferredVoiceIds(preferred_voice_ids);
+
+    // Voice6 is matched when the utterance locale exactly matches its locale.
+    utterance->SetEngineId("");
+    utterance->SetLang("es-es");
+    EXPECT_EQ(6, controller->GetMatchingVoice(utterance.get(), voices));
+
+    // The 7th voice is the default for "es", even though the utterance is
+    // "es-ar". |voice6| is not matched because it is not the default.
+    utterance->SetEngineId("");
+    utterance->SetLang("es-ar");
+    EXPECT_EQ(7, controller->GetMatchingVoice(utterance.get(), voices));
+
+    // The 8th voice is like the built-in "Android" voice, it has no lang
+    // and no extension ID. Make sure it can still be matched.
+    preferred_voice_ids.locale_voice_id.reset();
+    delegate.SetPreferredVoiceIds(preferred_voice_ids);
+    utterance->SetVoiceName("Android");
+    utterance->SetEngineId("");
+    utterance->SetLang("");
+    EXPECT_EQ(8, controller->GetMatchingVoice(utterance.get(), voices));
+
+    delegate.SetPreferredVoiceIds({});
+#endif
+  }
+
+  {
+    // Check voices against system language.
+    std::vector<VoiceData> voices;
+    VoiceData voice0;
+    voice0.engine_id = "id0";
+    voice0.name = "voice0";
+    voice0.lang = "en-GB";
+    voices.push_back(voice0);
+    VoiceData voice1;
+    voice1.engine_id = "id1";
+    voice1.name = "voice1";
+    voice1.lang = "en-US";
+    voices.push_back(voice1);
+    std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+
+    // voice1 is matched against the exact default system language.
+    TestContentBrowserClient::GetInstance()->set_application_locale("en-US");
+    utterance->SetLang("");
+    EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+
+#if defined(OS_CHROMEOS)
+    // voice0 is matched against the system language which has no region piece.
+    TestContentBrowserClient::GetInstance()->set_application_locale("en");
+    EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+
+    TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids2;
+    preferred_voice_ids2.locale_voice_id.emplace("voice0", "id0");
+    delegate.SetPreferredVoiceIds(preferred_voice_ids2);
+    // voice0 is matched against the pref over the system language.
+    TestContentBrowserClient::GetInstance()->set_application_locale("en-US");
+    EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+#endif
+  }
+}
+
+class TtsControllerTestHelper {
+ public:
+  TtsControllerTestHelper() {
+    controller_.SetTtsPlatform(&platform_impl_);
+    // This ensures utterances don't immediately complete.
+    platform_impl_.set_run_speak_callback(false);
+    platform_impl_.set_is_speaking(true);
+  }
+
+  std::unique_ptr<TestWebContents> CreateWebContents() {
+    return std::unique_ptr<TestWebContents>(
+        TestWebContents::Create(&browser_context_, nullptr));
+  }
+
+  std::unique_ptr<TtsUtteranceImpl> CreateUtterance(WebContents* web_contents) {
+    return std::make_unique<TtsUtteranceImpl>(&browser_context_, web_contents);
+  }
+
+  MockTtsPlatformImpl* platform_impl() { return &platform_impl_; }
+
+  TtsControllerForTesting* controller() { return &controller_; }
+
+  TtsUtterance* TtsControllerCurrentUtterance() {
+    return controller_.current_utterance_.get();
+  }
+
+  bool IsUtteranceListEmpty() { return controller_.utterance_list_.empty(); }
+
+ private:
+  content::BrowserTaskEnvironment task_environment_;
+  RenderViewHostTestEnabler rvh_enabler_;
+  TestBrowserContext browser_context_;
+  MockTtsPlatformImpl platform_impl_;
+  TtsControllerForTesting controller_;
+};
+
+TEST(TtsControllerTest, StopsWhenWebContentsDestroyed) {
+  TtsControllerTestHelper helper;
+  std::unique_ptr<WebContents> web_contents = helper.CreateWebContents();
+  std::unique_ptr<TtsUtteranceImpl> utterance =
+      helper.CreateUtterance(web_contents.get());
+
+  helper.controller()->SpeakOrEnqueue(std::move(utterance));
+  EXPECT_TRUE(helper.controller()->IsSpeaking());
+  EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+
+  web_contents.reset();
+  // Destroying the WebContents should reset
+  // |TtsController::current_utterance_|.
+  EXPECT_FALSE(helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed) {
+  TtsControllerTestHelper helper;
+  std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+  std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents();
+  std::unique_ptr<TtsUtteranceImpl> utterance1 =
+      helper.CreateUtterance(web_contents1.get());
+  void* raw_utterance1 = utterance1.get();
+  std::unique_ptr<TtsUtteranceImpl> utterance2 =
+      helper.CreateUtterance(web_contents2.get());
+  utterance2->SetCanEnqueue(true);
+  void* raw_utterance2 = utterance2.get();
+
+  helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+  EXPECT_TRUE(helper.controller()->IsSpeaking());
+  EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+  helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+  EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance());
+
+  web_contents1.reset();
+  // Destroying |web_contents1| should delete |utterance1| and start
+  // |utterance2|.
+  EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+  EXPECT_EQ(raw_utterance2, helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed2) {
+  TtsControllerTestHelper helper;
+  std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+  std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents();
+  std::unique_ptr<TtsUtteranceImpl> utterance1 =
+      helper.CreateUtterance(web_contents1.get());
+  void* raw_utterance1 = utterance1.get();
+  std::unique_ptr<TtsUtteranceImpl> utterance2 =
+      helper.CreateUtterance(web_contents1.get());
+  std::unique_ptr<TtsUtteranceImpl> utterance3 =
+      helper.CreateUtterance(web_contents2.get());
+  void* raw_utterance3 = utterance3.get();
+  utterance2->SetCanEnqueue(true);
+  utterance3->SetCanEnqueue(true);
+
+  helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+  helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+  helper.controller()->SpeakOrEnqueue(std::move(utterance3));
+  EXPECT_TRUE(helper.controller()->IsSpeaking());
+  EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance());
+
+  web_contents1.reset();
+  // Deleting |web_contents1| should delete |utterance1| and |utterance2| as
+  // they are both from |web_contents1|. |raw_utterance3| should be made the
+  // current as it's from a different WebContents.
+  EXPECT_EQ(raw_utterance3, helper.TtsControllerCurrentUtterance());
+  EXPECT_TRUE(helper.IsUtteranceListEmpty());
+
+  web_contents2.reset();
+  // Deleting |web_contents2| should delete |utterance3| as it's from a
+  // different WebContents.
+  EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsUtteranceWhenWebContentsHidden) {
+  TtsControllerTestHelper helper;
+  std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents();
+  web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+  std::unique_ptr<TtsUtteranceImpl> utterance =
+      helper.CreateUtterance(web_contents.get());
+  helper.controller()->SpeakOrEnqueue(std::move(utterance));
+  EXPECT_TRUE(helper.controller()->IsSpeaking());
+}
+
+TEST(TtsControllerTest,
+     DoesNotStartUtteranceWhenWebContentsHiddenAndStopSpeakingWhenHiddenSet) {
+  TtsControllerTestHelper helper;
+  std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents();
+  web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+  std::unique_ptr<TtsUtteranceImpl> utterance =
+      helper.CreateUtterance(web_contents.get());
+  helper.controller()->SetStopSpeakingWhenHidden(true);
+  helper.controller()->SpeakOrEnqueue(std::move(utterance));
+  EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+  EXPECT_TRUE(helper.IsUtteranceListEmpty());
+}
+
+TEST(TtsControllerTest, SkipsQueuedUtteranceFromHiddenWebContents) {
+  TtsControllerTestHelper helper;
+  helper.controller()->SetStopSpeakingWhenHidden(true);
+  std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+  std::unique_ptr<TestWebContents> web_contents2 = helper.CreateWebContents();
+  std::unique_ptr<TtsUtteranceImpl> utterance1 =
+      helper.CreateUtterance(web_contents1.get());
+  const int utterance1_id = utterance1->GetId();
+  std::unique_ptr<TtsUtteranceImpl> utterance2 =
+      helper.CreateUtterance(web_contents2.get());
+  utterance2->SetCanEnqueue(true);
+
+  helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+  EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+  EXPECT_TRUE(helper.IsUtteranceListEmpty());
+
+  // Speak |utterance2|, which should get queued.
+  helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+  EXPECT_FALSE(helper.IsUtteranceListEmpty());
+
+  // Make the second WebContents hidden, this shouldn't change anything in
+  // TtsController.
+  web_contents2->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+  EXPECT_FALSE(helper.IsUtteranceListEmpty());
+
+  // Finish |utterance1|, which should skip |utterance2| because |web_contents2|
+  // is hidden.
+  helper.controller()->OnTtsEvent(utterance1_id, TTS_EVENT_END, 0, 0, {});
+  EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+  EXPECT_TRUE(helper.IsUtteranceListEmpty());
+}
 
 }  // namespace content
diff --git a/chromium/content/browser/speech/tts_linux.cc b/chromium/content/browser/speech/tts_linux.cc
index 2c6d2d03133..afd36df9ed3 100644
--- a/chromium/content/browser/speech/tts_linux.cc
+++ b/chromium/content/browser/speech/tts_linux.cc
@@ -14,7 +14,6 @@
 #include "base/macros.h"
 #include "base/memory/singleton.h"
 #include "base/synchronization/lock.h"
-#include "base/task/post_task.h"
 #include "base/task/thread_pool.h"
 #include "content/browser/speech/tts_platform_impl.h"
 #include "content/public/browser/browser_task_traits.h"
@@ -128,7 +127,7 @@ void TtsPlatformImplLinux::Initialize() {
     // spd_open has memory leaks which are hard to suppress.
     // http://crbug.com/317360
     ANNOTATE_SCOPED_MEMORY_LEAK;
-    conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL,
+    conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
                                         SPD_MODE_THREADED);
   }
   if (!conn_)
@@ -151,7 +150,7 @@ TtsPlatformImplLinux::~TtsPlatformImplLinux() {
   base::AutoLock lock(initialization_lock_);
   if (conn_) {
     libspeechd_loader_.spd_close(conn_);
-    conn_ = NULL;
+    conn_ = nullptr;
   }
 }
 
@@ -159,14 +158,14 @@ void TtsPlatformImplLinux::Reset() {
   base::AutoLock lock(initialization_lock_);
   if (conn_)
     libspeechd_loader_.spd_close(conn_);
-  conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL,
+  conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
                                       SPD_MODE_THREADED);
 }
 
 bool TtsPlatformImplLinux::PlatformImplAvailable() {
   if (!initialization_lock_.Try())
     return false;
-  bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
+  bool result = libspeechd_loader_.loaded() && (conn_ != nullptr);
   initialization_lock_.Release();
   return result;
 }
@@ -345,8 +344,8 @@ void TtsPlatformImplLinux::NotificationCallback(size_t msg_id,
   // be in a separate thread.
   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
     current_notification_ = type;
-    base::PostTask(
-        FROM_HERE, {BrowserThread::UI},
+    GetUIThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent,
                        base::Unretained(TtsPlatformImplLinux::GetInstance()),
                        type));
@@ -365,8 +364,8 @@ void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
   // be in a separate thread.
   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
     current_notification_ = state;
-    base::PostTask(
-        FROM_HERE, {BrowserThread::UI},
+    GetUIThreadTaskRunner({})->PostTask(
+        FROM_HERE,
         base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent,
                        base::Unretained(TtsPlatformImplLinux::GetInstance()),
                        state));
diff --git a/chromium/content/browser/speech/tts_utterance_impl.cc b/chromium/content/browser/speech/tts_utterance_impl.cc
index 34ff42b55c5..aa6759c3973 100644
--- a/chromium/content/browser/speech/tts_utterance_impl.cc
+++ b/chromium/content/browser/speech/tts_utterance_impl.cc
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 
 #include "content/browser/speech/tts_utterance_impl.h"
+
 #include "base/values.h"
 #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
 
@@ -37,11 +38,14 @@ int TtsUtteranceImpl::next_utterance_id_ = 0;
 
 std::unique_ptr<TtsUtterance> TtsUtterance::Create(
     BrowserContext* browser_context) {
-  return std::make_unique<TtsUtteranceImpl>(browser_context);
+  return std::make_unique<TtsUtteranceImpl>(browser_context, nullptr);
 }
 
-TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context)
-    : browser_context_(browser_context),
+TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context,
+                                   WebContents* web_contents)
+    : WebContentsObserver(web_contents),
+      browser_context_(browser_context),
+      was_created_with_web_contents_(web_contents != nullptr),
       id_(next_utterance_id_++),
       src_id_(-1),
       can_enqueue_(false),
diff --git a/chromium/content/browser/speech/tts_utterance_impl.h b/chromium/content/browser/speech/tts_utterance_impl.h
index fc73f7c7570..2b54961acbd 100644
--- a/chromium/content/browser/speech/tts_utterance_impl.h
+++ b/chromium/content/browser/speech/tts_utterance_impl.h
@@ -5,22 +5,32 @@
 #ifndef CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_
 #define CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_
 
+#include <memory>
 #include <set>
 #include <string>
 
-#include "base/values.h"
-#include "content/public/browser/tts_controller.h"
 #include "content/public/browser/tts_utterance.h"
+#include "content/public/browser/web_contents_observer.h"
+
+namespace base {
+class Value;
+}
 
 namespace content {
 class BrowserContext;
+class WebContents;
 
 // Implementation of TtsUtterance.
-class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance {
+class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance,
+                                        public WebContentsObserver {
  public:
-  TtsUtteranceImpl(BrowserContext* browser_context);
+  TtsUtteranceImpl(BrowserContext* browser_context, WebContents* web_contents);
   ~TtsUtteranceImpl() override;
 
+  bool was_created_with_web_contents() const {
+    return was_created_with_web_contents_;
+  }
+
   // TtsUtterance overrides.
   void OnTtsEvent(TtsEventType event_type,
                   int char_index,
@@ -77,6 +87,9 @@ class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance {
   // The BrowserContext that initiated this utterance.
   BrowserContext* browser_context_;
 
+  // True if the constructor was supplied with a WebContents.
+  const bool was_created_with_web_contents_;
+
   // The content embedder engine ID of the engine providing TTS for this
   // utterance, or empty if native TTS is being used.
   std::string engine_id_;