summaryrefslogtreecommitdiff
path: root/chromium/content/browser/speech
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/content/browser/speech')
-rw-r--r--chromium/content/browser/speech/DEPS8
-rw-r--r--chromium/content/browser/speech/proto/BUILD.gn9
-rw-r--r--chromium/content/browser/speech/proto/google_streaming_api.proto76
-rw-r--r--chromium/content/browser/speech/speech_recognition_browsertest.cc11
-rw-r--r--chromium/content/browser/speech/speech_recognition_dispatcher_host.cc17
-rw-r--r--chromium/content/browser/speech/speech_recognition_engine.cc203
-rw-r--r--chromium/content/browser/speech/speech_recognition_engine.h26
-rw-r--r--chromium/content/browser/speech/speech_recognition_engine_unittest.cc2
-rw-r--r--chromium/content/browser/speech/speech_recognition_manager_impl.cc40
-rw-r--r--chromium/content/browser/speech/speech_recognizer.h2
-rw-r--r--chromium/content/browser/speech/speech_recognizer_impl.cc34
-rw-r--r--chromium/content/browser/speech/speech_recognizer_impl_android.cc45
-rw-r--r--chromium/content/browser/speech/speech_recognizer_impl_unittest.cc2
-rw-r--r--chromium/content/browser/speech/speech_synthesis_impl.cc12
-rw-r--r--chromium/content/browser/speech/speech_synthesis_impl.h6
-rw-r--r--chromium/content/browser/speech/tts_controller_impl.cc351
-rw-r--r--chromium/content/browser/speech/tts_controller_impl.h52
-rw-r--r--chromium/content/browser/speech/tts_controller_unittest.cc429
-rw-r--r--chromium/content/browser/speech/tts_linux.cc17
-rw-r--r--chromium/content/browser/speech/tts_utterance_impl.cc10
-rw-r--r--chromium/content/browser/speech/tts_utterance_impl.h21
21 files changed, 854 insertions, 519 deletions
diff --git a/chromium/content/browser/speech/DEPS b/chromium/content/browser/speech/DEPS
index d3ee893ebac..7c726080d23 100644
--- a/chromium/content/browser/speech/DEPS
+++ b/chromium/content/browser/speech/DEPS
@@ -1,3 +1,11 @@
include_rules = [
+ "+components/speech",
"+google_apis", # Exception to general rule, see content/DEPS for details.
]
+
+specific_include_rules = {
+ "tts_controller_impl\.cc": [
+ # TtsControllerImpl uses GetLanguage(), which is not grd related.
+ "+ui/base/l10n/l10n_util.h",
+ ],
+}
diff --git a/chromium/content/browser/speech/proto/BUILD.gn b/chromium/content/browser/speech/proto/BUILD.gn
deleted file mode 100644
index dafd61f78ee..00000000000
--- a/chromium/content/browser/speech/proto/BUILD.gn
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import("//third_party/protobuf/proto_library.gni")
-
-proto_library("proto") {
- sources = [ "google_streaming_api.proto" ]
-}
diff --git a/chromium/content/browser/speech/proto/google_streaming_api.proto b/chromium/content/browser/speech/proto/google_streaming_api.proto
deleted file mode 100644
index ce1b8d98a49..00000000000
--- a/chromium/content/browser/speech/proto/google_streaming_api.proto
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-syntax = "proto2";
-option optimize_for = LITE_RUNTIME;
-
-// TODO(hans): Commented out due to compilation errors.
-// option cc_api_version = 2;
-
-package content.proto;
-
-// SpeechRecognitionEvent is the only message type sent to client.
-//
-// The first SpeechRecognitionEvent is an empty (default) message to indicate
-// as early as possible that the stream connection has been established.
-message SpeechRecognitionEvent {
- enum StatusCode {
- // Note: in JavaScript API SpeechRecognitionError 0 is "OTHER" error.
- STATUS_SUCCESS = 0;
- STATUS_NO_SPEECH = 1;
- STATUS_ABORTED = 2;
- STATUS_AUDIO_CAPTURE = 3;
- STATUS_NETWORK = 4;
- STATUS_NOT_ALLOWED = 5;
- STATUS_SERVICE_NOT_ALLOWED = 6;
- STATUS_BAD_GRAMMAR = 7;
- STATUS_LANGUAGE_NOT_SUPPORTED = 8;
- }
- optional StatusCode status = 1 [default = STATUS_SUCCESS];
-
- // May contain zero or one final=true result (the newly settled portion).
- // May also contain zero or more final=false results.
- // (Note that this differs from JavaScript API resultHistory in that no more
- // than one final=true result is returned, so client must accumulate
- // resultHistory by concatenating the final=true results.)
- repeated SpeechRecognitionResult result = 2;
-
- enum EndpointerEventType {
- START_OF_SPEECH = 0;
- END_OF_SPEECH = 1;
- END_OF_AUDIO = 2; // End of audio stream has been reached.
- // End of utterance indicates that no more speech segments are expected.
- END_OF_UTTERANCE = 3;
- }
-
- optional EndpointerEventType endpoint = 4;
-};
-
-message SpeechRecognitionResult {
- repeated SpeechRecognitionAlternative alternative = 1;
-
- // True if this is the final time the speech service will return this
- // particular SpeechRecognitionResult. If false, then this represents an
- // interim result that could still be changed.
- optional bool final = 2 [default = false];
-
- // An estimate of the probability that the recognizer will not change its
- // guess about this interim result. Values range from 0.0 (completely
- // unstable) to 1.0 (completely stable). Note that this is not the same as
- // "confidence", which estimate the probability that a recognition result
- // is correct. This field is only provided for interim (final=false) results.
- optional float stability = 3;
-};
-
-// Item in N-best list.
-message SpeechRecognitionAlternative {
- // Spoken text.
- optional string transcript = 1;
-
- // The confidence estimate between 0.0 and 1.0. A higher number means the
- // system is more confident that the recognition is correct.
- // This field is typically provided only for the top hypothesis and only for
- // final results.
- optional float confidence = 2;
-}
diff --git a/chromium/content/browser/speech/speech_recognition_browsertest.cc b/chromium/content/browser/speech/speech_recognition_browsertest.cc
index bb688ed2d2e..cf1dc5cbf2c 100644
--- a/chromium/content/browser/speech/speech_recognition_browsertest.cc
+++ b/chromium/content/browser/speech/speech_recognition_browsertest.cc
@@ -17,15 +17,14 @@
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/sys_byteorder.h"
-#include "base/task/post_task.h"
#include "base/threading/thread_task_runner_handle.h"
#include "build/build_config.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/browser/speech/speech_recognizer_impl.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
+#include "content/public/browser/google_streaming_api.pb.h"
#include "content/public/browser/notification_types.h"
#include "content/public/browser/web_contents.h"
#include "content/public/test/browser_test.h"
@@ -61,8 +60,8 @@ class MockAudioSystem : public media::AudioSystem {
// Posting callback to allow current SpeechRecognizerImpl dispatching event
// to complete before transitioning to the next FSM state.
- base::PostTask(
- FROM_HERE, {content::BrowserThread::IO},
+ content::GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(std::move(on_params_cb),
media::AudioParameters::UnavailableDeviceParams()));
}
@@ -230,8 +229,8 @@ class SpeechRecognitionBrowserTest : public ContentBrowserTest {
// AudioCaptureSourcer::Stop() again.
SpeechRecognizerImpl::SetAudioEnvironmentForTesting(nullptr, nullptr);
- base::PostTask(FROM_HERE, {content::BrowserThread::UI},
- base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse,
+ content::GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse,
base::Unretained(this)));
}
diff --git a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
index 443516d9a70..155fc32d1ca 100644
--- a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
+++ b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc
@@ -9,7 +9,6 @@
#include "base/bind.h"
#include "base/command_line.h"
#include "base/lazy_instance.h"
-#include "base/task/post_task.h"
#include "content/browser/browser_plugin/browser_plugin_guest.h"
#include "content/browser/frame_host/frame_tree_node.h"
#include "content/browser/frame_host/render_frame_host_manager.h"
@@ -62,8 +61,8 @@ void SpeechRecognitionDispatcherHost::Start(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&SpeechRecognitionDispatcherHost::StartRequestOnUI,
AsWeakPtr(), render_process_id_, render_frame_id_,
std::move(params)));
@@ -89,6 +88,14 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
WebContentsImpl* web_contents =
static_cast<WebContentsImpl*>(WebContents::FromRenderFrameHost(rfh));
+ // Disable BackForwardCache when using the SpeechRecognition feature, because
+ // currently we do not handle speech recognition after placing the page in
+ // BackForwardCache.
+ // TODO(sreejakshetty): Make SpeechRecognition compatible with
+ // BackForwardCache.
+ rfh->OnSchedulerTrackedFeatureUsed(
+ blink::scheduler::WebSchedulerTrackedFeature::kSpeechRecognizer);
+
// If the speech API request was from an inner WebContents or a guest, save
// the context of the outer WebContents or the embedder since we will use it
// to decide permission.
@@ -126,8 +133,8 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
StoragePartition* storage_partition = BrowserContext::GetStoragePartition(
browser_context, web_contents->GetSiteInstance());
- base::PostTask(
- FROM_HERE, {BrowserThread::IO},
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(
&SpeechRecognitionDispatcherHost::StartSessionOnIO,
speech_recognition_dispatcher_host, std::move(params),
diff --git a/chromium/content/browser/speech/speech_recognition_engine.cc b/chromium/content/browser/speech/speech_recognition_engine.cc
index e55a430e581..c7af8469cee 100644
--- a/chromium/content/browser/speech/speech_recognition_engine.cc
+++ b/chromium/content/browser/speech/speech_recognition_engine.cc
@@ -15,7 +15,7 @@
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "content/browser/speech/audio_buffer.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/browser/google_streaming_api.pb.h"
#include "google_apis/google_api_keys.h"
#include "mojo/public/c/system/types.h"
#include "mojo/public/cpp/bindings/receiver_set.h"
@@ -23,8 +23,6 @@
#include "net/base/load_flags.h"
#include "net/traffic_annotation/network_traffic_annotation.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
-#include "services/network/public/cpp/simple_url_loader.h"
-#include "services/network/public/mojom/chunked_data_pipe_getter.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
@@ -42,9 +40,6 @@ const char* web_service_base_url_for_tests = nullptr;
// This matches the maximum maxAlternatives value supported by the server.
const uint32_t kMaxMaxAlternatives = 30;
-// Maximum amount of data written per Mojo write.
-const uint32_t kMaxUploadWrite = 128 * 1024;
-
// TODO(hans): Remove this and other logging when we don't need it anymore.
void DumpResponse(const std::string& response) {
DVLOG(1) << "------------";
@@ -81,198 +76,6 @@ const uint32_t kDefaultMaxHypotheses = 1;
} // namespace
-// Streams sound data up to the server.
-class SpeechRecognitionEngine::UpstreamLoader
- : public network::mojom::ChunkedDataPipeGetter {
- public:
- UpstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request,
- net::NetworkTrafficAnnotationTag upstream_traffic_annotation,
- network::mojom::URLLoaderFactory* url_loader_factory,
- SpeechRecognitionEngine* speech_recognition_engine)
- : speech_recognition_engine_(speech_recognition_engine) {
- // Attach a chunked upload body.
- mojo::PendingRemote<network::mojom::ChunkedDataPipeGetter> data_remote;
- receiver_set_.Add(this, data_remote.InitWithNewPipeAndPassReceiver());
- resource_request->request_body = new network::ResourceRequestBody();
- resource_request->request_body->SetToChunkedDataPipe(
- std::move(data_remote));
- simple_url_loader_ = network::SimpleURLLoader::Create(
- std::move(resource_request), upstream_traffic_annotation);
- simple_url_loader_->DownloadToStringOfUnboundedSizeUntilCrashAndDie(
- url_loader_factory,
- base::BindOnce(&UpstreamLoader::OnComplete, base::Unretained(this)));
- }
-
- ~UpstreamLoader() override = default;
-
- void OnComplete(std::unique_ptr<std::string> response_body) {
- int response_code = -1;
- if (simple_url_loader_->ResponseInfo() &&
- simple_url_loader_->ResponseInfo()->headers) {
- response_code =
- simple_url_loader_->ResponseInfo()->headers->response_code();
- }
- speech_recognition_engine_->OnUpstreamDataComplete(response_body != nullptr,
- response_code);
- }
-
- void AppendChunkToUpload(const std::string& data, bool is_last_chunk) {
- DCHECK(!has_last_chunk_);
-
- upload_body_ += data;
- if (is_last_chunk) {
- // Send size before the rest of the body. While it doesn't matter much, if
- // the other side receives the size before the last chunk, which Mojo does
- // not gaurantee, some protocols can merge the data and the last chunk
- // itself into a single frame.
- has_last_chunk_ = is_last_chunk;
- if (get_size_callback_)
- std::move(get_size_callback_).Run(net::OK, upload_body_.size());
- }
-
- SendData();
- }
-
- private:
- void OnUploadPipeWriteable(MojoResult unused) { SendData(); }
-
- // Attempts to send more of the upload body, if more data is available, and
- // |upload_pipe_| is valid.
- void SendData() {
- DCHECK_LE(upload_position_, upload_body_.size());
-
- if (!upload_pipe_.is_valid())
- return;
-
- // Nothing more to write yet, or done writing everything.
- if (upload_position_ == upload_body_.size())
- return;
-
- // Since kMaxUploadWrite is a uint32_t, no overflow occurs in this downcast.
- uint32_t write_bytes = std::min(upload_body_.length() - upload_position_,
- static_cast<size_t>(kMaxUploadWrite));
- MojoResult result =
- upload_pipe_->WriteData(upload_body_.data() + upload_position_,
- &write_bytes, MOJO_WRITE_DATA_FLAG_NONE);
-
- // Wait for the pipe to have more capacity available, if needed.
- if (result == MOJO_RESULT_SHOULD_WAIT) {
- upload_pipe_watcher_->ArmOrNotify();
- return;
- }
-
- // Do nothing on pipe closure - depend on the SimpleURLLoader to notice the
- // other pipes being closed on error. Can reach this point if there's a
- // retry, for instance, so cannot draw any conclusions here.
- if (result != MOJO_RESULT_OK)
- return;
-
- upload_position_ += write_bytes;
- // If more data is available, arm the watcher again. Don't write again in a
- // loop, even if WriteData would allow it, to avoid blocking the current
- // thread.
- if (upload_position_ < upload_body_.size())
- upload_pipe_watcher_->ArmOrNotify();
- }
-
- // mojom::ChunkedDataPipeGetter implementation:
-
- void GetSize(GetSizeCallback get_size_callback) override {
- if (has_last_chunk_) {
- std::move(get_size_callback).Run(net::OK, upload_body_.size());
- } else {
- get_size_callback_ = std::move(get_size_callback);
- }
- }
-
- void StartReading(mojo::ScopedDataPipeProducerHandle pipe) override {
- // Delete any existing pipe, if any.
- upload_pipe_watcher_.reset();
- upload_pipe_ = std::move(pipe);
- upload_pipe_watcher_ = std::make_unique<mojo::SimpleWatcher>(
- FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::MANUAL);
- upload_pipe_watcher_->Watch(
- upload_pipe_.get(), MOJO_HANDLE_SIGNAL_WRITABLE,
- base::BindRepeating(&UpstreamLoader::OnUploadPipeWriteable,
- base::Unretained(this)));
- upload_position_ = 0;
-
- // Will attempt to start sending the request body, if any data is available.
- SendData();
- }
-
- // Partial upload body. Have to cache the entire thing in memory, in case have
- // to replay it.
- std::string upload_body_;
- // Current position in |upload_body_|. All bytes before this point have been
- // written to |upload_pipe_|.
- size_t upload_position_ = 0;
- // Whether |upload_body_| is complete.
- bool has_last_chunk_ = false;
-
- // Current pipe being used to send the |upload_body_| to the URLLoader.
- mojo::ScopedDataPipeProducerHandle upload_pipe_;
- // Watches |upload_pipe_| for writeability.
- std::unique_ptr<mojo::SimpleWatcher> upload_pipe_watcher_;
-
- // If non-null, invoked once the size of the upload is known.
- network::mojom::ChunkedDataPipeGetter::GetSizeCallback get_size_callback_;
-
- SpeechRecognitionEngine* const speech_recognition_engine_;
- std::unique_ptr<network::SimpleURLLoader> simple_url_loader_;
- mojo::ReceiverSet<network::mojom::ChunkedDataPipeGetter> receiver_set_;
-
- DISALLOW_COPY_AND_ASSIGN(UpstreamLoader);
-};
-
-// Streams response data from the server to the SpeechRecognitionEngine.
-class SpeechRecognitionEngine::DownstreamLoader
- : public network::SimpleURLLoaderStreamConsumer {
- public:
- DownstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request,
- net::NetworkTrafficAnnotationTag upstream_traffic_annotation,
- network::mojom::URLLoaderFactory* url_loader_factory,
- SpeechRecognitionEngine* speech_recognition_engine)
- : speech_recognition_engine_(speech_recognition_engine) {
- simple_url_loader_ = network::SimpleURLLoader::Create(
- std::move(resource_request), upstream_traffic_annotation);
- simple_url_loader_->DownloadAsStream(url_loader_factory, this);
- }
-
- ~DownstreamLoader() override = default;
-
- // SimpleURLLoaderStreamConsumer implementation:
-
- void OnDataReceived(base::StringPiece string_piece,
- base::OnceClosure resume) override {
- speech_recognition_engine_->OnDownstreamDataReceived(string_piece);
- std::move(resume).Run();
- }
-
- void OnComplete(bool success) override {
- int response_code = -1;
- if (simple_url_loader_->ResponseInfo() &&
- simple_url_loader_->ResponseInfo()->headers) {
- response_code =
- simple_url_loader_->ResponseInfo()->headers->response_code();
- }
-
- speech_recognition_engine_->OnDownstreamDataComplete(success,
- response_code);
- }
-
- void OnRetry(base::OnceClosure start_retry) override {
- // Retries are not enabled for these requests.
- NOTREACHED();
- }
-
- private:
- SpeechRecognitionEngine* const speech_recognition_engine_;
- std::unique_ptr<network::SimpleURLLoader> simple_url_loader_;
-
- DISALLOW_COPY_AND_ASSIGN(DownstreamLoader);
-};
-
SpeechRecognitionEngine::Config::Config()
: filter_profanities(false),
continuous(true),
@@ -562,7 +365,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
auto downstream_request = std::make_unique<network::ResourceRequest>();
downstream_request->credentials_mode = network::mojom::CredentialsMode::kOmit;
downstream_request->url = downstream_url;
- downstream_loader_ = std::make_unique<DownstreamLoader>(
+ downstream_loader_ = std::make_unique<speech::DownstreamLoader>(
std::move(downstream_request), downstream_traffic_annotation,
shared_url_loader_factory_.get(), this);
@@ -667,7 +470,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
encoder_->GetMimeType());
}
- upstream_loader_ = std::make_unique<UpstreamLoader>(
+ upstream_loader_ = std::make_unique<speech::UpstreamLoader>(
std::move(upstream_request), upstream_traffic_annotation,
shared_url_loader_factory_.get(), this);
diff --git a/chromium/content/browser/speech/speech_recognition_engine.h b/chromium/content/browser/speech/speech_recognition_engine.h
index 1f3501200e5..d1e99750312 100644
--- a/chromium/content/browser/speech/speech_recognition_engine.h
+++ b/chromium/content/browser/speech/speech_recognition_engine.h
@@ -14,6 +14,10 @@
#include "base/memory/ref_counted.h"
#include "base/sequence_checker.h"
#include "base/strings/string_piece.h"
+#include "components/speech/downstream_loader.h"
+#include "components/speech/downstream_loader_client.h"
+#include "components/speech/upstream_loader.h"
+#include "components/speech/upstream_loader_client.h"
#include "content/browser/speech/audio_encoder.h"
#include "content/browser/speech/chunked_byte_buffer.h"
#include "content/common/content_export.h"
@@ -59,7 +63,9 @@ struct SpeechRecognitionError;
// EndRecognition. If a recognition was started, the caller can free the
// SpeechRecognitionEngine only after calling EndRecognition.
-class CONTENT_EXPORT SpeechRecognitionEngine {
+class CONTENT_EXPORT SpeechRecognitionEngine
+ : public speech::UpstreamLoaderClient,
+ public speech::DownstreamLoaderClient {
public:
class Delegate {
public:
@@ -104,7 +110,7 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
SpeechRecognitionEngine(
scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory,
const std::string& accept_language);
- ~SpeechRecognitionEngine();
+ ~SpeechRecognitionEngine() override;
// Sets the URL requests are sent to for tests.
static void set_web_service_base_url_for_tests(
@@ -119,8 +125,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
int GetDesiredAudioChunkDurationMs() const;
private:
- class UpstreamLoader;
- class DownstreamLoader;
+ friend class speech::UpstreamLoaderClient;
+ friend class speech::DownstreamLoader;
Delegate* delegate_;
@@ -171,10 +177,12 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
DISALLOW_COPY_AND_ASSIGN(FSMEventArgs);
};
- void OnUpstreamDataComplete(bool success, int response_code);
+ // speech::UpstreamLoaderClient
+ void OnUpstreamDataComplete(bool success, int response_code) override;
- void OnDownstreamDataReceived(base::StringPiece new_response_data);
- void OnDownstreamDataComplete(bool success, int response_code);
+ // speech::DownstreamLoaderClient
+ void OnDownstreamDataReceived(base::StringPiece new_response_data) override;
+ void OnDownstreamDataComplete(bool success, int response_code) override;
// Entry point for pushing any new external event into the recognizer FSM.
void DispatchEvent(const FSMEventArgs& event_args);
@@ -204,8 +212,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
void UploadAudioChunk(const std::string& data, FrameType type, bool is_final);
Config config_;
- std::unique_ptr<UpstreamLoader> upstream_loader_;
- std::unique_ptr<DownstreamLoader> downstream_loader_;
+ std::unique_ptr<speech::UpstreamLoader> upstream_loader_;
+ std::unique_ptr<speech::DownstreamLoader> downstream_loader_;
scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory_;
const std::string accept_language_;
std::unique_ptr<AudioEncoder> encoder_;
diff --git a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
index 1312af4f6f6..a68c4165ff5 100644
--- a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
+++ b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc
@@ -17,7 +17,7 @@
#include "base/sys_byteorder.h"
#include "base/test/task_environment.h"
#include "content/browser/speech/audio_buffer.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/browser/google_streaming_api.pb.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "net/base/net_errors.h"
#include "net/http/http_response_headers.h"
diff --git a/chromium/content/browser/speech/speech_recognition_manager_impl.cc b/chromium/content/browser/speech/speech_recognition_manager_impl.cc
index 9a55a7c6e80..4220ef86ec3 100644
--- a/chromium/content/browser/speech/speech_recognition_manager_impl.cc
+++ b/chromium/content/browser/speech/speech_recognition_manager_impl.cc
@@ -15,7 +15,6 @@
#include "base/memory/ref_counted_delete_on_sequence.h"
#include "base/sequenced_task_runner.h"
#include "base/single_thread_task_runner.h"
-#include "base/task/post_task.h"
#include "base/threading/thread_task_runner_handle.h"
#include "build/build_config.h"
#include "content/browser/browser_main_loop.h"
@@ -196,10 +195,9 @@ void SpeechRecognitionManagerImpl::FrameDeletionObserver::ContentsObserver::
RenderFrameDeleted(RenderFrameHost* render_frame_host) {
auto iters = observed_frames_.equal_range(render_frame_host);
for (auto it = iters.first; it != iters.second; ++it) {
- base::CreateSingleThreadTaskRunner({BrowserThread::IO})
- ->PostTask(FROM_HERE,
- base::BindOnce(parent_observer_->frame_deleted_callback_,
- it->second));
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
+ base::BindOnce(parent_observer_->frame_deleted_callback_, it->second));
}
observed_frames_.erase(iters.first, iters.second);
@@ -303,14 +301,13 @@ int SpeechRecognitionManagerImpl::CreateSession(
// The deletion observer is owned by this class, so it's safe to use
// Unretained.
- base::CreateSingleThreadTaskRunner({BrowserThread::UI})
- ->PostTask(
- FROM_HERE,
- base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
- CreateObserverForSession,
- base::Unretained(frame_deletion_observer_.get()),
- config.initial_context.render_process_id,
- config.initial_context.render_frame_id, session_id));
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
+ base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
+ CreateObserverForSession,
+ base::Unretained(frame_deletion_observer_.get()),
+ config.initial_context.render_process_id,
+ config.initial_context.render_frame_id, session_id));
return session_id;
}
@@ -413,15 +410,14 @@ void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
// The deletion observer is owned by this class, so it's safe to use
// Unretained.
- base::CreateSingleThreadTaskRunner({BrowserThread::UI})
- ->PostTask(
- FROM_HERE,
- base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
- RemoveObserverForSession,
- base::Unretained(frame_deletion_observer_.get()),
- iter->second->config.initial_context.render_process_id,
- iter->second->config.initial_context.render_frame_id,
- session_id));
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
+ base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver::
+ RemoveObserverForSession,
+ base::Unretained(frame_deletion_observer_.get()),
+ iter->second->config.initial_context.render_process_id,
+ iter->second->config.initial_context.render_frame_id,
+ session_id));
AbortSessionImpl(session_id);
}
diff --git a/chromium/content/browser/speech/speech_recognizer.h b/chromium/content/browser/speech/speech_recognizer.h
index 64c896518a2..7a5f5204eee 100644
--- a/chromium/content/browser/speech/speech_recognizer.h
+++ b/chromium/content/browser/speech/speech_recognizer.h
@@ -5,7 +5,7 @@
#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
-#include "base/logging.h"
+#include "base/check.h"
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "content/common/content_export.h"
diff --git a/chromium/content/browser/speech/speech_recognizer_impl.cc b/chromium/content/browser/speech/speech_recognizer_impl.cc
index b03a554f559..c17b704614b 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl.cc
@@ -11,7 +11,6 @@
#include "base/bind.h"
#include "base/macros.h"
#include "base/numerics/ranges.h"
-#include "base/task/post_task.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "content/browser/browser_main_loop.h"
@@ -223,22 +222,22 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
DCHECK(!device_id.empty());
device_id_ = device_id;
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(),
FSMEventArgs(EVENT_PREPARE)));
}
void SpeechRecognizerImpl::AbortRecognition() {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(),
FSMEventArgs(EVENT_ABORT)));
}
void SpeechRecognizerImpl::StopAudioCapture() {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(),
FSMEventArgs(EVENT_STOP_CAPTURE)));
}
@@ -278,15 +277,15 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data,
// Convert audio from native format to fixed format used by WebSpeech.
FSMEventArgs event_args(EVENT_AUDIO_DATA);
event_args.audio_data = audio_converter_->Convert(data);
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(), event_args));
// See http://crbug.com/506051 regarding why one extra convert call can
// sometimes be required. It should be a rare case.
if (!audio_converter_->data_was_converted()) {
event_args.audio_data = audio_converter_->Convert(data);
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(), event_args));
}
// Something is seriously wrong here and we are most likely missing some
@@ -296,8 +295,8 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data,
void SpeechRecognizerImpl::OnCaptureError(const std::string& message) {
FSMEventArgs event_args(EVENT_AUDIO_ERROR);
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(), event_args));
}
@@ -305,8 +304,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
FSMEventArgs event_args(EVENT_ENGINE_RESULT);
event_args.engine_results = mojo::Clone(results);
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(), event_args));
}
@@ -319,8 +318,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) {
FSMEventArgs event_args(EVENT_ENGINE_ERROR);
event_args.engine_error = error;
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent,
weak_ptr_factory_.GetWeakPtr(), event_args));
}
@@ -880,6 +879,7 @@ void SpeechRecognizerImpl::CreateAudioCapturerSource() {
stream_factory.InitWithNewPipeAndPassReceiver());
audio_capturer_source_ = audio::CreateInputDevice(
std::move(stream_factory), device_id_,
+ audio::DeadStreamDetection::kEnabled,
MediaInternals::GetInstance()->CreateMojoAudioLog(
media::AudioLogFactory::AUDIO_INPUT_CONTROLLER,
0 /* component_id */));
diff --git a/chromium/content/browser/speech/speech_recognizer_impl_android.cc b/chromium/content/browser/speech/speech_recognizer_impl_android.cc
index c72dd8464c2..49ff8496d40 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl_android.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl_android.cc
@@ -12,7 +12,6 @@
#include "base/android/scoped_java_ref.h"
#include "base/bind.h"
#include "base/strings/utf_string_conversions.h"
-#include "base/task/post_task.h"
#include "content/public/android/content_jni_headers/SpeechRecognitionImpl_jni.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
@@ -42,14 +41,14 @@ void SpeechRecognizerImplAndroid::StartRecognition(
const std::string& device_id) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
// TODO(xians): Open the correct device for speech on Android.
- base::PostTask(
- FROM_HERE, {BrowserThread::IO},
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&SpeechRecognitionEventListener::OnRecognitionStart,
base::Unretained(listener()), session_id()));
SpeechRecognitionSessionConfig config =
SpeechRecognitionManager::GetInstance()->GetSessionConfig(session_id());
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(
&content::SpeechRecognizerImplAndroid::StartRecognitionOnUIThread,
this, config.language, config.continuous, config.interim_results));
@@ -71,8 +70,8 @@ void SpeechRecognizerImplAndroid::StartRecognitionOnUIThread(
void SpeechRecognizerImplAndroid::AbortRecognition() {
if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
state_ = STATE_IDLE;
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&content::SpeechRecognizerImplAndroid::AbortRecognition,
this));
return;
@@ -85,8 +84,8 @@ void SpeechRecognizerImplAndroid::AbortRecognition() {
void SpeechRecognizerImplAndroid::StopAudioCapture() {
if (BrowserThread::CurrentlyOn(BrowserThread::IO)) {
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&content::SpeechRecognizerImplAndroid::StopAudioCapture,
this));
return;
@@ -111,8 +110,8 @@ void SpeechRecognizerImplAndroid::OnAudioStart(
JNIEnv* env,
const JavaParamRef<jobject>& obj) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart,
this, nullptr, nullptr));
return;
}
@@ -125,8 +124,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart(
JNIEnv* env,
const JavaParamRef<jobject>& obj) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart,
this, nullptr, nullptr));
return;
}
@@ -137,8 +136,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart(
void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env,
const JavaParamRef<jobject>& obj) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd,
this, nullptr, nullptr));
return;
}
@@ -149,8 +148,8 @@ void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env,
void SpeechRecognizerImplAndroid::OnAudioEnd(JNIEnv* env,
const JavaParamRef<jobject>& obj) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(FROM_HERE, {BrowserThread::IO},
- base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd,
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd,
this, nullptr, nullptr));
return;
}
@@ -181,8 +180,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
options[i], static_cast<double>(scores[i])));
}
result->is_provisional = provisional;
- base::PostTask(
- FROM_HERE, {BrowserThread::IO},
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(
&SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread, this,
std::move(results)));
@@ -199,8 +198,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionError(
const JavaParamRef<jobject>& obj,
jint error) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(
- FROM_HERE, {BrowserThread::IO},
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionError, this,
nullptr, nullptr, error));
return;
@@ -217,8 +216,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionEnd(
JNIEnv* env,
const JavaParamRef<jobject>& obj) {
if (BrowserThread::CurrentlyOn(BrowserThread::UI)) {
- base::PostTask(
- FROM_HERE, {BrowserThread::IO},
+ GetIOThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionEnd, this,
nullptr, nullptr));
return;
diff --git a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
index 7645df8014d..a8369b26471 100644
--- a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
+++ b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc
@@ -18,9 +18,9 @@
#include "base/test/scoped_feature_list.h"
#include "base/threading/thread.h"
#include "base/threading/thread_task_runner_handle.h"
-#include "content/browser/speech/proto/google_streaming_api.pb.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/browser/speech/speech_recognizer_impl.h"
+#include "content/public/browser/google_streaming_api.pb.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/common/content_features.h"
#include "content/public/test/browser_task_environment.h"
diff --git a/chromium/content/browser/speech/speech_synthesis_impl.cc b/chromium/content/browser/speech/speech_synthesis_impl.cc
index 1ab5ad01656..e46103bcb66 100644
--- a/chromium/content/browser/speech/speech_synthesis_impl.cc
+++ b/chromium/content/browser/speech/speech_synthesis_impl.cc
@@ -4,6 +4,8 @@
#include "content/browser/speech/speech_synthesis_impl.h"
+#include "content/browser/speech/tts_utterance_impl.h"
+
namespace content {
namespace {
@@ -85,9 +87,11 @@ void SendVoiceListToObserver(
} // namespace
-SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context)
- : browser_context_(browser_context) {
+SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context,
+ WebContents* web_contents)
+ : browser_context_(browser_context), web_contents_(web_contents) {
DCHECK(browser_context_);
+ DCHECK(web_contents_);
TtsController::GetInstance()->AddVoicesChangedDelegate(this);
}
@@ -120,8 +124,8 @@ void SpeechSynthesisImpl::AddVoiceListObserver(
void SpeechSynthesisImpl::Speak(
blink::mojom::SpeechSynthesisUtterancePtr utterance,
mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) {
- std::unique_ptr<TtsUtterance> tts_utterance(
- TtsUtterance::Create((browser_context_)));
+ std::unique_ptr<TtsUtterance> tts_utterance =
+ std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_);
tts_utterance->SetText(utterance->text);
tts_utterance->SetLang(utterance->lang);
tts_utterance->SetVoiceName(utterance->voice);
diff --git a/chromium/content/browser/speech/speech_synthesis_impl.h b/chromium/content/browser/speech/speech_synthesis_impl.h
index 7db29e521cb..96cdacf46eb 100644
--- a/chromium/content/browser/speech/speech_synthesis_impl.h
+++ b/chromium/content/browser/speech/speech_synthesis_impl.h
@@ -12,6 +12,7 @@
namespace content {
class BrowserContext;
+class WebContents;
// Back-end for the web speech synthesis API; dispatches speech requests to
// content::TtsController and forwards voice lists and events back to the
@@ -19,7 +20,8 @@ class BrowserContext;
class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis,
public VoicesChangedDelegate {
public:
- explicit SpeechSynthesisImpl(BrowserContext* browser_context);
+ SpeechSynthesisImpl(BrowserContext* browser_context,
+ WebContents* web_contents);
~SpeechSynthesisImpl() override;
SpeechSynthesisImpl(const SpeechSynthesisImpl&) = delete;
@@ -44,6 +46,8 @@ class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis,
private:
BrowserContext* browser_context_;
+ WebContents* web_contents_;
+
mojo::ReceiverSet<blink::mojom::SpeechSynthesis> receiver_set_;
mojo::RemoteSet<blink::mojom::SpeechSynthesisVoiceListObserver> observer_set_;
};
diff --git a/chromium/content/browser/speech/tts_controller_impl.cc b/chromium/content/browser/speech/tts_controller_impl.cc
index 108a5a7d0a6..e34045ca4e1 100644
--- a/chromium/content/browser/speech/tts_controller_impl.cc
+++ b/chromium/content/browser/speech/tts_controller_impl.cc
@@ -6,6 +6,7 @@
#include <stddef.h>
+#include <algorithm>
#include <string>
#include <vector>
@@ -16,20 +17,47 @@
#include "base/metrics/user_metrics.h"
#include "base/values.h"
#include "build/build_config.h"
+#include "content/browser/speech/tts_utterance_impl.h"
#include "content/public/browser/content_browser_client.h"
+#include "content/public/browser/visibility.h"
+#include "content/public/browser/web_contents.h"
#include "content/public/common/content_client.h"
#include "services/data_decoder/public/cpp/safe_xml_parser.h"
#include "services/data_decoder/public/mojom/xml_parser.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
+#include "ui/base/l10n/l10n_util.h"
-namespace content {
+#if defined(OS_CHROMEOS)
+#include "content/public/browser/tts_controller_delegate.h"
+#endif
+namespace content {
+namespace {
// A value to be used to indicate that there is no char index available.
const int kInvalidCharIndex = -1;
// A value to be used to indicate that there is no length available.
const int kInvalidLength = -1;
+#if defined(OS_CHROMEOS)
+bool VoiceIdMatches(
+ const base::Optional<TtsControllerDelegate::PreferredVoiceId>& id,
+ const content::VoiceData& voice) {
+ if (!id.has_value() || voice.name.empty() ||
+ (voice.engine_id.empty() && !voice.native))
+ return false;
+ if (voice.native)
+ return id->name == voice.name && id->id.empty();
+ return id->name == voice.name && id->id == voice.engine_id;
+}
+#endif // defined(OS_CHROMEOS)
+
+TtsUtteranceImpl* AsUtteranceImpl(TtsUtterance* utterance) {
+ return static_cast<TtsUtteranceImpl*>(utterance);
+}
+
+} // namespace
+
//
// VoiceData
//
@@ -77,16 +105,12 @@ TtsControllerImpl* TtsControllerImpl::GetInstance() {
return base::Singleton<TtsControllerImpl>::get();
}
-TtsControllerImpl::TtsControllerImpl()
- : delegate_(nullptr),
- current_utterance_(nullptr),
- paused_(false),
- tts_platform_(nullptr) {}
+TtsControllerImpl::TtsControllerImpl() = default;
TtsControllerImpl::~TtsControllerImpl() {
if (current_utterance_) {
current_utterance_->Finish();
- current_utterance_.reset();
+ SetCurrentUtterance(nullptr);
}
// Clear any queued utterances too.
@@ -95,17 +119,22 @@ TtsControllerImpl::~TtsControllerImpl() {
void TtsControllerImpl::SpeakOrEnqueue(
std::unique_ptr<TtsUtterance> utterance) {
+ if (!ShouldSpeakUtterance(utterance.get())) {
+ utterance->Finish();
+ return;
+ }
+
// If we're paused and we get an utterance that can't be queued,
// flush the queue but stay in the paused state.
if (paused_ && !utterance->GetCanEnqueue()) {
- utterance_deque_.emplace_back(std::move(utterance));
+ utterance_list_.emplace_back(std::move(utterance));
Stop();
paused_ = true;
return;
}
if (paused_ || (IsSpeaking() && utterance->GetCanEnqueue())) {
- utterance_deque_.emplace_back(std::move(utterance));
+ utterance_list_.emplace_back(std::move(utterance));
} else {
Stop();
SpeakNow(std::move(utterance));
@@ -113,26 +142,30 @@ void TtsControllerImpl::SpeakOrEnqueue(
}
void TtsControllerImpl::Stop() {
- StopInternal(GURL());
+ StopAndClearQueue(GURL());
}
void TtsControllerImpl::Stop(const GURL& source_url) {
- StopInternal(source_url);
+ StopAndClearQueue(source_url);
+}
+
+void TtsControllerImpl::StopAndClearQueue(const GURL& source_url) {
+ if (StopCurrentUtteranceIfMatches(source_url))
+ ClearUtteranceQueue(true);
}
-void TtsControllerImpl::StopInternal(const GURL& source_url) {
+bool TtsControllerImpl::StopCurrentUtteranceIfMatches(const GURL& source_url) {
base::RecordAction(base::UserMetricsAction("TextToSpeech.Stop"));
paused_ = false;
if (!source_url.is_empty() && current_utterance_ &&
current_utterance_->GetSrcUrl().GetOrigin() != source_url.GetOrigin())
- return;
+ return false;
if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
- if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
- GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop(
- current_utterance_.get());
+ if (engine_delegate_)
+ engine_delegate_->Stop(current_utterance_.get());
} else {
GetTtsPlatform()->ClearError();
GetTtsPlatform()->StopSpeaking();
@@ -142,7 +175,7 @@ void TtsControllerImpl::StopInternal(const GURL& source_url) {
current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
kInvalidLength, std::string());
FinishCurrentUtterance();
- ClearUtteranceQueue(true); // Send events.
+ return true;
}
void TtsControllerImpl::Pause() {
@@ -150,9 +183,8 @@ void TtsControllerImpl::Pause() {
paused_ = true;
if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
- if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
- GetTtsControllerDelegate()->GetTtsEngineDelegate()->Pause(
- current_utterance_.get());
+ if (engine_delegate_)
+ engine_delegate_->Pause(current_utterance_.get());
} else if (current_utterance_) {
GetTtsPlatform()->ClearError();
GetTtsPlatform()->Pause();
@@ -164,9 +196,8 @@ void TtsControllerImpl::Resume() {
paused_ = false;
if (current_utterance_ && !current_utterance_->GetEngineId().empty()) {
- if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
- GetTtsControllerDelegate()->GetTtsEngineDelegate()->Resume(
- current_utterance_.get());
+ if (engine_delegate_)
+ engine_delegate_->Resume(current_utterance_.get());
} else if (current_utterance_) {
GetTtsPlatform()->ClearError();
GetTtsPlatform()->Resume();
@@ -245,11 +276,8 @@ void TtsControllerImpl::GetVoices(BrowserContext* browser_context,
tts_platform->GetVoices(out_voices);
}
- if (browser_context) {
- TtsControllerDelegate* delegate = GetTtsControllerDelegate();
- if (delegate && delegate->GetTtsEngineDelegate())
- delegate->GetTtsEngineDelegate()->GetVoices(browser_context, out_voices);
- }
+ if (browser_context && engine_delegate_)
+ engine_delegate_->GetVoices(browser_context, out_voices);
}
bool TtsControllerImpl::IsSpeaking() {
@@ -276,22 +304,21 @@ void TtsControllerImpl::RemoveVoicesChangedDelegate(
void TtsControllerImpl::RemoveUtteranceEventDelegate(
UtteranceEventDelegate* delegate) {
// First clear any pending utterances with this delegate.
- std::deque<std::unique_ptr<TtsUtterance>> old_deque;
- utterance_deque_.swap(old_deque);
- while (!old_deque.empty()) {
- std::unique_ptr<TtsUtterance> utterance = std::move(old_deque.front());
- old_deque.pop_front();
+ std::list<std::unique_ptr<TtsUtterance>> old_list;
+ utterance_list_.swap(old_list);
+ while (!old_list.empty()) {
+ std::unique_ptr<TtsUtterance> utterance = std::move(old_list.front());
+ old_list.pop_front();
if (utterance->GetEventDelegate() != delegate)
- utterance_deque_.emplace_back(std::move(utterance));
+ utterance_list_.emplace_back(std::move(utterance));
}
if (current_utterance_ &&
current_utterance_->GetEventDelegate() == delegate) {
current_utterance_->SetEventDelegate(nullptr);
if (!current_utterance_->GetEngineId().empty()) {
- if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
- GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop(
- current_utterance_.get());
+ if (engine_delegate_)
+ engine_delegate_->Stop(current_utterance_.get());
} else {
GetTtsPlatform()->ClearError();
GetTtsPlatform()->StopSpeaking();
@@ -304,17 +331,11 @@ void TtsControllerImpl::RemoveUtteranceEventDelegate(
}
void TtsControllerImpl::SetTtsEngineDelegate(TtsEngineDelegate* delegate) {
- if (!GetTtsControllerDelegate())
- return;
-
- GetTtsControllerDelegate()->SetTtsEngineDelegate(delegate);
+ engine_delegate_ = delegate;
}
TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
- if (!GetTtsControllerDelegate())
- return nullptr;
-
- return GetTtsControllerDelegate()->GetTtsEngineDelegate();
+ return engine_delegate_;
}
void TtsControllerImpl::OnBrowserContextDestroyed(
@@ -322,7 +343,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed(
bool did_clear_utterances = false;
// First clear the BrowserContext from any utterances.
- for (std::unique_ptr<TtsUtterance>& utterance : utterance_deque_) {
+ for (std::unique_ptr<TtsUtterance>& utterance : utterance_list_) {
if (utterance->GetBrowserContext() == browser_context) {
utterance->ClearBrowserContext();
did_clear_utterances = true;
@@ -342,7 +363,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed(
// safe to use base::Unretained because this is a singleton.
if (did_clear_utterances) {
base::ThreadTaskRunnerHandle::Get()->PostTask(
- FROM_HERE, base::BindOnce(&TtsControllerImpl::StopInternal,
+ FROM_HERE, base::BindOnce(&TtsControllerImpl::StopAndClearQueue,
base::Unretained(this), GURL()));
}
}
@@ -352,7 +373,7 @@ void TtsControllerImpl::SetTtsPlatform(TtsPlatform* tts_platform) {
}
int TtsControllerImpl::QueueSize() {
- return static_cast<int>(utterance_deque_.size());
+ return static_cast<int>(utterance_list_.size());
}
TtsPlatform* TtsControllerImpl::GetTtsPlatform() {
@@ -362,15 +383,6 @@ TtsPlatform* TtsControllerImpl::GetTtsPlatform() {
}
void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
- // Note: this would only happen if a content embedder failed to provide
- // their own TtsControllerDelegate. Chrome provides one, and Content Shell
- // provides a mock one for web tests.
- if (!GetTtsControllerDelegate()) {
- utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
- kInvalidLength, std::string());
- return;
- }
-
// Get all available voices and try to find a matching voice.
std::vector<VoiceData> voices;
GetVoices(utterance->GetBrowserContext(), &voices);
@@ -379,8 +391,7 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
// to true because that might trigger deferred loading of native voices.
// TODO(katie): Move most of the GetMatchingVoice logic into content/ and
// use the TTS controller delegate to get chrome-specific info as needed.
- int index =
- GetTtsControllerDelegate()->GetMatchingVoice(utterance.get(), voices);
+ int index = GetMatchingVoice(utterance.get(), voices);
VoiceData voice;
if (index >= 0)
voice = voices[index];
@@ -411,23 +422,22 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) {
if (!voice.native) {
#if !defined(OS_ANDROID)
DCHECK(!voice.engine_id.empty());
- current_utterance_ = std::move(utterance);
+ SetCurrentUtterance(std::move(utterance));
current_utterance_->SetEngineId(voice.engine_id);
- if (GetTtsControllerDelegate()->GetTtsEngineDelegate())
- GetTtsControllerDelegate()->GetTtsEngineDelegate()->Speak(
- current_utterance_.get(), voice);
+ if (engine_delegate_)
+ engine_delegate_->Speak(current_utterance_.get(), voice);
bool sends_end_event =
voice.events.find(TTS_EVENT_END) != voice.events.end();
if (!sends_end_event) {
current_utterance_->Finish();
- current_utterance_.reset();
+ SetCurrentUtterance(nullptr);
SpeakNextUtterance();
}
-#endif
+#endif // !defined(OS_ANDROID)
} else {
// It's possible for certain platforms to send start events immediately
// during |speak|.
- current_utterance_ = std::move(utterance);
+ SetCurrentUtterance(std::move(utterance));
GetTtsPlatform()->ClearError();
GetTtsPlatform()->Speak(
current_utterance_->GetId(), current_utterance_->GetText(),
@@ -451,20 +461,20 @@ void TtsControllerImpl::OnSpeakFinished(int utterance_id, bool success) {
// the browser has built-in TTS that isn't loaded yet.
if (GetTtsPlatform()->LoadBuiltInTtsEngine(
current_utterance_->GetBrowserContext())) {
- utterance_deque_.emplace_back(std::move(current_utterance_));
+ utterance_list_.emplace_back(std::move(current_utterance_));
return;
}
current_utterance_->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
kInvalidLength, GetTtsPlatform()->GetError());
- current_utterance_.reset();
+ SetCurrentUtterance(nullptr);
}
void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
- while (!utterance_deque_.empty()) {
+ while (!utterance_list_.empty()) {
std::unique_ptr<TtsUtterance> utterance =
- std::move(utterance_deque_.front());
- utterance_deque_.pop_front();
+ std::move(utterance_list_.front());
+ utterance_list_.pop_front();
if (send_events) {
utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
kInvalidLength, std::string());
@@ -479,7 +489,7 @@ void TtsControllerImpl::FinishCurrentUtterance() {
if (!current_utterance_->IsFinished())
current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
kInvalidLength, std::string());
- current_utterance_.reset();
+ SetCurrentUtterance(nullptr);
}
}
@@ -489,11 +499,14 @@ void TtsControllerImpl::SpeakNextUtterance() {
// Start speaking the next utterance in the queue. Keep trying in case
// one fails but there are still more in the queue to try.
- while (!utterance_deque_.empty() && !current_utterance_) {
+ while (!utterance_list_.empty() && !current_utterance_) {
std::unique_ptr<TtsUtterance> utterance =
- std::move(utterance_deque_.front());
- utterance_deque_.pop_front();
- SpeakNow(std::move(utterance));
+ std::move(utterance_list_.front());
+ utterance_list_.pop_front();
+ if (ShouldSpeakUtterance(utterance.get()))
+ SpeakNow(std::move(utterance));
+ else
+ utterance->Finish();
}
}
@@ -502,8 +515,9 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) {
double pitch = utterance->GetContinuousParameters().pitch;
double volume = utterance->GetContinuousParameters().volume;
#if defined(OS_CHROMEOS)
- GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs(utterance, &rate,
- &pitch, &volume);
+ if (GetTtsControllerDelegate())
+ GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs(
+ utterance, &rate, &pitch, &volume);
#else
// Update pitch, rate and volume to defaults if not explicity set on
// this utterance.
@@ -517,14 +531,8 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) {
utterance->SetContinuousParameters(rate, pitch, volume);
}
-TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() {
- if (delegate_)
- return delegate_;
- if (GetContentClient() && GetContentClient()->browser()) {
- delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate();
- return delegate_;
- }
- return nullptr;
+void TtsControllerImpl::SetStopSpeakingWhenHidden(bool value) {
+ stop_speaking_when_hidden_ = value;
}
void TtsControllerImpl::StripSSML(
@@ -595,4 +603,175 @@ void TtsControllerImpl::PopulateParsedText(std::string* parsed_text,
}
}
+int TtsControllerImpl::GetMatchingVoice(TtsUtterance* utterance,
+ const std::vector<VoiceData>& voices) {
+ const std::string app_lang =
+ GetContentClient()->browser()->GetApplicationLocale();
+ // Start with a best score of -1, that way even if none of the criteria
+ // match, something will be returned if there are any voices.
+ int best_score = -1;
+ int best_score_index = -1;
+#if defined(OS_CHROMEOS)
+ TtsControllerDelegate* delegate = GetTtsControllerDelegate();
+ std::unique_ptr<TtsControllerDelegate::PreferredVoiceIds> preferred_ids =
+ delegate ? delegate->GetPreferredVoiceIdsForUtterance(utterance)
+ : nullptr;
+#endif // defined(OS_CHROMEOS)
+ for (size_t i = 0; i < voices.size(); ++i) {
+ const content::VoiceData& voice = voices[i];
+ int score = 0;
+
+ // If the extension ID is specified, check for an exact match.
+ if (!utterance->GetEngineId().empty() &&
+ utterance->GetEngineId() != voice.engine_id)
+ continue;
+
+ // If the voice name is specified, check for an exact match.
+ if (!utterance->GetVoiceName().empty() &&
+ voice.name != utterance->GetVoiceName())
+ continue;
+
+ // Prefer the utterance language.
+ if (!voice.lang.empty() && !utterance->GetLang().empty()) {
+ // An exact language match is worth more than a partial match.
+ if (voice.lang == utterance->GetLang()) {
+ score += 128;
+ } else if (l10n_util::GetLanguage(voice.lang) ==
+ l10n_util::GetLanguage(utterance->GetLang())) {
+ score += 64;
+ }
+ }
+
+ // Next, prefer required event types.
+ if (!utterance->GetRequiredEventTypes().empty()) {
+ bool has_all_required_event_types = true;
+ for (TtsEventType event_type : utterance->GetRequiredEventTypes()) {
+ if (voice.events.find(event_type) == voice.events.end()) {
+ has_all_required_event_types = false;
+ break;
+ }
+ }
+ if (has_all_required_event_types)
+ score += 32;
+ }
+
+#if defined(OS_CHROMEOS)
+ if (preferred_ids) {
+ // First prefer the user's preference voice for the utterance language,
+ // if the utterance language is specified.
+ if (!utterance->GetLang().empty() &&
+ VoiceIdMatches(preferred_ids->lang_voice_id, voice)) {
+ score += 16;
+ }
+
+ // Then prefer the user's preference voice for the system language.
+ // This is a lower priority match than the utterance voice.
+ if (VoiceIdMatches(preferred_ids->locale_voice_id, voice))
+ score += 8;
+
+ // Finally, prefer the user's preference voice for any language. This will
+ // pick the default voice if there is no better match for the current
+ // system language and utterance language.
+ if (VoiceIdMatches(preferred_ids->any_locale_voice_id, voice))
+ score += 4;
+ }
+#endif // defined(OS_CHROMEOS)
+
+ // Finally, prefer system language.
+ if (!voice.lang.empty()) {
+ if (voice.lang == app_lang) {
+ score += 2;
+ } else if (l10n_util::GetLanguage(voice.lang) ==
+ l10n_util::GetLanguage(app_lang)) {
+ score += 1;
+ }
+ }
+
+ if (score > best_score) {
+ best_score = score;
+ best_score_index = i;
+ }
+ }
+
+ return best_score_index;
+}
+
+void TtsControllerImpl::SetCurrentUtterance(
+ std::unique_ptr<TtsUtterance> utterance) {
+ current_utterance_ = std::move(utterance);
+ Observe(current_utterance_
+ ? AsUtteranceImpl(current_utterance_.get())->web_contents()
+ : nullptr);
+}
+
+void TtsControllerImpl::StopCurrentUtteranceAndRemoveUtterancesMatching(
+ WebContents* wc) {
+ DCHECK(wc);
+ // Removes any utterances that match the WebContents from the current
+ // utterance (which our inherited WebContentsObserver starts observing every
+ // time the utterance changes).
+ //
+ // This is called when the WebContents for the current utterance is destroyed
+ // or hidden. In the case where it's destroyed, this is done to avoid
+ // attempting to start a utterance that is very likely to be destroyed right
+ // away, and there are also subtle timing issues if we didn't do this (if a
+ // queued utterance has already received WebContentsDestroyed(), and we start
+ // it, we won't get the corresponding WebContentsDestroyed()).
+ auto eraser = [wc](const std::unique_ptr<TtsUtterance>& utterance) {
+ TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance.get());
+ if (utterance_impl->web_contents() == wc) {
+ utterance_impl->Finish();
+ return true;
+ }
+ return false;
+ };
+ utterance_list_.erase(
+ std::remove_if(utterance_list_.begin(), utterance_list_.end(), eraser),
+ utterance_list_.end());
+ const bool stopped = StopCurrentUtteranceIfMatches(GURL());
+ DCHECK(stopped);
+ SpeakNextUtterance();
+}
+
+bool TtsControllerImpl::ShouldSpeakUtterance(TtsUtterance* utterance) {
+ TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance);
+ if (!utterance_impl->was_created_with_web_contents())
+ return true;
+
+ // If the WebContents that created the utterance has been destroyed, don't
+ // speak it.
+ if (!utterance_impl->web_contents())
+ return false;
+
+ // Allow speaking if either the WebContents is visible, or the WebContents
+ // isn't required to be visible before speaking.
+ return !stop_speaking_when_hidden_ ||
+ utterance_impl->web_contents()->GetVisibility() != Visibility::HIDDEN;
+}
+
+//
+// WebContentsObserver
+//
+
+void TtsControllerImpl::WebContentsDestroyed() {
+ StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents());
+}
+
+void TtsControllerImpl::OnVisibilityChanged(Visibility visibility) {
+ if (visibility == Visibility::HIDDEN && stop_speaking_when_hidden_)
+ StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents());
+}
+
+#if defined(OS_CHROMEOS)
+TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() {
+ if (delegate_)
+ return delegate_;
+ if (GetContentClient() && GetContentClient()->browser()) {
+ delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate();
+ return delegate_;
+ }
+ return nullptr;
+}
+#endif // defined(OS_CHROMEOS)
+
} // namespace content
diff --git a/chromium/content/browser/speech/tts_controller_impl.h b/chromium/content/browser/speech/tts_controller_impl.h
index 052a8841be9..638c3691d6d 100644
--- a/chromium/content/browser/speech/tts_controller_impl.h
+++ b/chromium/content/browser/speech/tts_controller_impl.h
@@ -5,9 +5,8 @@
#ifndef CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_
#define CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_
-#include <deque>
+#include <list>
#include <memory>
-#include <set>
#include <string>
#include <vector>
@@ -22,18 +21,23 @@
#include "build/build_config.h"
#include "content/common/content_export.h"
#include "content/public/browser/tts_controller.h"
-#include "content/public/browser/tts_controller_delegate.h"
#include "content/public/browser/tts_platform.h"
+#include "content/public/browser/web_contents_observer.h"
#include "services/data_decoder/public/cpp/data_decoder.h"
#include "url/gurl.h"
namespace content {
class BrowserContext;
+#if defined(OS_CHROMEOS)
+class TtsControllerDelegate;
+#endif
+
// Singleton class that manages text-to-speech for all TTS engines and
// APIs, maintaining a queue of pending utterances and keeping
// track of all state.
-class CONTENT_EXPORT TtsControllerImpl : public TtsController {
+class CONTENT_EXPORT TtsControllerImpl : public TtsController,
+ public WebContentsObserver {
public:
// Get the single instance of this class.
static TtsControllerImpl* GetInstance();
@@ -58,6 +62,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
void RemoveUtteranceEventDelegate(UtteranceEventDelegate* delegate) override;
void SetTtsEngineDelegate(TtsEngineDelegate* delegate) override;
TtsEngineDelegate* GetTtsEngineDelegate() override;
+ void SetStopSpeakingWhenHidden(bool value) override;
// Called directly by ~BrowserContext, because a raw BrowserContext pointer
// is stored in an Utterance.
@@ -77,6 +82,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
~TtsControllerImpl() override;
private:
+ friend class TtsControllerTestHelper;
FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestTtsControllerShutdown);
FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestGetMatchingVoice);
FRIEND_TEST_ALL_PREFIXES(TtsControllerTest,
@@ -92,7 +98,13 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
// |utterance| or delete it if there's an error. Returns true on success.
void SpeakNow(std::unique_ptr<TtsUtterance> utterance);
- void StopInternal(const GURL& source_url);
+ // If the current utterance matches |source_url|, it is stopped and the
+ // utterance queue cleared.
+ void StopAndClearQueue(const GURL& source_url);
+
+ // Stops the current utterance if it matches |source_url|. Returns true on
+ // success, false if the current utterance does not match |source_url|.
+ bool StopCurrentUtteranceIfMatches(const GURL& source_url);
// Clear the utterance queue. If send_events is true, will send
// TTS_EVENT_CANCELLED events on each one.
@@ -120,9 +132,31 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
static void PopulateParsedText(std::string* parsed_text,
const base::Value* element);
+ int GetMatchingVoice(TtsUtterance* utterance,
+ const std::vector<VoiceData>& voices);
+
+ // Called internally to set |current_utterance_|.
+ void SetCurrentUtterance(std::unique_ptr<TtsUtterance> utterance);
+
+ // Used when the WebContents of the current utterance is destroyed/hidden.
+ void StopCurrentUtteranceAndRemoveUtterancesMatching(WebContents* wc);
+
+ // Returns true if the utterance should be spoken.
+ bool ShouldSpeakUtterance(TtsUtterance* utterance);
+
+ // WebContentsObserver methods
+ void WebContentsDestroyed() override;
+ void OnVisibilityChanged(Visibility visibility) override;
+
+#if defined(OS_CHROMEOS)
TtsControllerDelegate* GetTtsControllerDelegate();
- TtsControllerDelegate* delegate_;
+ TtsControllerDelegate* delegate_ = nullptr;
+#endif
+
+ TtsEngineDelegate* engine_delegate_ = nullptr;
+
+ bool stop_speaking_when_hidden_ = false;
// A set of delegates that want to be notified when the voices change.
base::ObserverList<VoicesChangedDelegate> voices_changed_delegates_;
@@ -131,14 +165,14 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController {
std::unique_ptr<TtsUtterance> current_utterance_;
// Whether the queue is paused or not.
- bool paused_;
+ bool paused_ = false;
// A pointer to the platform implementation of text-to-speech, for
// dependency injection.
- TtsPlatform* tts_platform_;
+ TtsPlatform* tts_platform_ = nullptr;
// A queue of utterances to speak after the current one finishes.
- std::deque<std::unique_ptr<TtsUtterance>> utterance_deque_;
+ std::list<std::unique_ptr<TtsUtterance>> utterance_list_;
DISALLOW_COPY_AND_ASSIGN(TtsControllerImpl);
};
diff --git a/chromium/content/browser/speech/tts_controller_unittest.cc b/chromium/content/browser/speech/tts_controller_unittest.cc
index 2282f8db739..593814ad203 100644
--- a/chromium/content/browser/speech/tts_controller_unittest.cc
+++ b/chromium/content/browser/speech/tts_controller_unittest.cc
@@ -4,25 +4,39 @@
// Unit tests for the TTS Controller.
+#include "content/browser/speech/tts_controller_impl.h"
+
#include "base/memory/ptr_util.h"
#include "base/values.h"
-#include "content/browser/speech/tts_controller_impl.h"
-#include "content/public/browser/tts_controller_delegate.h"
+#include "content/browser/speech/tts_utterance_impl.h"
#include "content/public/browser/tts_platform.h"
+#include "content/public/browser/visibility.h"
#include "content/public/test/browser_task_environment.h"
#include "content/public/test/test_browser_context.h"
+#include "content/public/test/test_renderer_host.h"
+#include "content/test/test_content_browser_client.h"
+#include "content/test/test_web_contents.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
-namespace content {
+#if defined(OS_CHROMEOS)
+#include "content/public/browser/tts_controller_delegate.h"
+#endif
-class TtsControllerTest : public testing::Test {};
+namespace content {
// Platform Tts implementation that does nothing.
class MockTtsPlatformImpl : public TtsPlatform {
public:
- MockTtsPlatformImpl() {}
- virtual ~MockTtsPlatformImpl() {}
+ MockTtsPlatformImpl() = default;
+ virtual ~MockTtsPlatformImpl() = default;
+
+ void set_voices(const std::vector<VoiceData>& voices) { voices_ = voices; }
+
+ void set_run_speak_callback(bool value) { run_speak_callback_ = value; }
+ void set_is_speaking(bool value) { is_speaking_ = value; }
+
+ // TtsPlatform:
bool PlatformImplAvailable() override { return true; }
void Speak(int utterance_id,
const std::string& utterance,
@@ -30,13 +44,16 @@ class MockTtsPlatformImpl : public TtsPlatform {
const VoiceData& voice,
const UtteranceContinuousParameters& params,
base::OnceCallback<void(bool)> on_speak_finished) override {
- std::move(on_speak_finished).Run(true);
+ if (run_speak_callback_)
+ std::move(on_speak_finished).Run(true);
}
- bool IsSpeaking() override { return false; }
+ bool IsSpeaking() override { return is_speaking_; }
bool StopSpeaking() override { return true; }
void Pause() override {}
void Resume() override {}
- void GetVoices(std::vector<VoiceData>* out_voices) override {}
+ void GetVoices(std::vector<VoiceData>* out_voices) override {
+ *out_voices = voices_;
+ }
bool LoadBuiltInTtsEngine(BrowserContext* browser_context) override {
return false;
}
@@ -45,12 +62,20 @@ class MockTtsPlatformImpl : public TtsPlatform {
void SetError(const std::string& error) override {}
std::string GetError() override { return std::string(); }
void ClearError() override {}
+
+ private:
+ std::vector<VoiceData> voices_;
+ bool run_speak_callback_ = true;
+ bool is_speaking_ = false;
};
+#if defined(OS_CHROMEOS)
class MockTtsControllerDelegate : public TtsControllerDelegate {
public:
- MockTtsControllerDelegate() {}
- ~MockTtsControllerDelegate() override {}
+ MockTtsControllerDelegate() = default;
+ ~MockTtsControllerDelegate() override = default;
+
+ void SetPreferredVoiceIds(const PreferredVoiceIds& ids) { ids_ = ids; }
BrowserContext* GetLastBrowserContext() {
BrowserContext* result = last_browser_context_;
@@ -58,10 +83,12 @@ class MockTtsControllerDelegate : public TtsControllerDelegate {
return result;
}
- int GetMatchingVoice(content::TtsUtterance* utterance,
- std::vector<content::VoiceData>& voices) override {
+ // TtsControllerDelegate:
+ std::unique_ptr<PreferredVoiceIds> GetPreferredVoiceIdsForUtterance(
+ TtsUtterance* utterance) override {
last_browser_context_ = utterance->GetBrowserContext();
- return -1;
+ auto ids = std::make_unique<PreferredVoiceIds>(ids_);
+ return ids;
}
void UpdateUtteranceDefaultsFromPrefs(content::TtsUtterance* utterance,
@@ -69,15 +96,11 @@ class MockTtsControllerDelegate : public TtsControllerDelegate {
double* pitch,
double* volume) override {}
- void SetTtsEngineDelegate(content::TtsEngineDelegate* delegate) override {}
-
- content::TtsEngineDelegate* GetTtsEngineDelegate() override {
- return nullptr;
- }
-
private:
BrowserContext* last_browser_context_ = nullptr;
+ PreferredVoiceIds ids_;
};
+#endif
// Subclass of TtsController with a public ctor and dtor.
class TtsControllerForTesting : public TtsControllerImpl {
@@ -86,11 +109,14 @@ class TtsControllerForTesting : public TtsControllerImpl {
~TtsControllerForTesting() override {}
};
-TEST_F(TtsControllerTest, TestTtsControllerShutdown) {
+TEST(TtsControllerTest, TestTtsControllerShutdown) {
MockTtsPlatformImpl platform_impl;
- TtsControllerForTesting* controller = new TtsControllerForTesting();
- MockTtsControllerDelegate* delegate = new MockTtsControllerDelegate();
- controller->delegate_ = delegate;
+ std::unique_ptr<TtsControllerForTesting> controller =
+ std::make_unique<TtsControllerForTesting>();
+#if defined(OS_CHROMEOS)
+ MockTtsControllerDelegate delegate;
+ controller->delegate_ = &delegate;
+#endif
controller->SetTtsPlatform(&platform_impl);
@@ -106,13 +132,11 @@ TEST_F(TtsControllerTest, TestTtsControllerShutdown) {
// Make sure that deleting the controller when there are pending
// utterances doesn't cause a crash.
- delete controller;
-
- // Clean up.
- delete delegate;
+ controller.reset();
}
-TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
+#if defined(OS_CHROMEOS)
+TEST(TtsControllerTest, TestBrowserContextRemoved) {
// Create a controller, mock other stuff, and create a test
// browser context.
TtsControllerImpl* controller = TtsControllerImpl::GetInstance();
@@ -123,9 +147,17 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
content::BrowserTaskEnvironment task_environment;
auto browser_context = std::make_unique<TestBrowserContext>();
+ std::vector<VoiceData> voices;
+ VoiceData voice_data;
+ voice_data.engine_id = "x";
+ voice_data.events.insert(TTS_EVENT_END);
+ voices.push_back(voice_data);
+ platform_impl.set_voices(voices);
+
// Speak an utterances associated with this test browser context.
std::unique_ptr<TtsUtterance> utterance1 =
TtsUtterance::Create(browser_context.get());
+ utterance1->SetEngineId("x");
utterance1->SetCanEnqueue(true);
utterance1->SetSrcId(1);
controller->SpeakOrEnqueue(std::move(utterance1));
@@ -137,6 +169,7 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
// this browser context.
std::unique_ptr<TtsUtterance> utterance2 =
TtsUtterance::Create(browser_context.get());
+ utterance2->SetEngineId("x");
utterance2->SetCanEnqueue(true);
utterance2->SetSrcId(2);
controller->SpeakOrEnqueue(std::move(utterance2));
@@ -150,9 +183,8 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) {
controller->SpeakNextUtterance();
ASSERT_EQ(nullptr, delegate.GetLastBrowserContext());
}
-
-#if !defined(OS_CHROMEOS)
-TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
+#else
+TEST(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
std::unique_ptr<TtsControllerForTesting> controller =
std::make_unique<TtsControllerForTesting>();
@@ -175,6 +207,337 @@ TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) {
EXPECT_EQ(blink::mojom::kSpeechSynthesisDefaultVolume,
utterance1->GetContinuousParameters().volume);
}
-#endif // !defined(OS_CHROMEOS)
+#endif
+
+TEST(TtsControllerTest, TestGetMatchingVoice) {
+ std::unique_ptr<TtsControllerForTesting> controller =
+ std::make_unique<TtsControllerForTesting>();
+#if defined(OS_CHROMEOS)
+ MockTtsControllerDelegate delegate;
+ controller->delegate_ = &delegate;
+#endif
+
+ TestContentBrowserClient::GetInstance()->set_application_locale("en");
+
+ {
+ // Calling GetMatchingVoice with no voices returns -1.
+ std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+ std::vector<VoiceData> voices;
+ EXPECT_EQ(-1, controller->GetMatchingVoice(utterance.get(), voices));
+ }
+
+ {
+ // Calling GetMatchingVoice with any voices returns the first one
+ // even if there are no criteria that match.
+ std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+ std::vector<VoiceData> voices(2);
+ EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+ }
+
+ {
+ // If nothing else matches, the English voice is returned.
+ // (In tests the language will always be English.)
+ std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+ std::vector<VoiceData> voices;
+ VoiceData fr_voice;
+ fr_voice.lang = "fr";
+ voices.push_back(fr_voice);
+ VoiceData en_voice;
+ en_voice.lang = "en";
+ voices.push_back(en_voice);
+ VoiceData de_voice;
+ de_voice.lang = "de";
+ voices.push_back(de_voice);
+ EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+ }
+
+ {
+ // Check precedence of various matching criteria.
+ std::vector<VoiceData> voices;
+ VoiceData voice0;
+ voices.push_back(voice0);
+ VoiceData voice1;
+ voice1.events.insert(TTS_EVENT_WORD);
+ voices.push_back(voice1);
+ VoiceData voice2;
+ voice2.lang = "de-DE";
+ voices.push_back(voice2);
+ VoiceData voice3;
+ voice3.lang = "fr-CA";
+ voices.push_back(voice3);
+ VoiceData voice4;
+ voice4.name = "Voice4";
+ voices.push_back(voice4);
+ VoiceData voice5;
+ voice5.engine_id = "id5";
+ voices.push_back(voice5);
+ VoiceData voice6;
+ voice6.engine_id = "id7";
+ voice6.name = "Voice6";
+ voice6.lang = "es-es";
+ voices.push_back(voice6);
+ VoiceData voice7;
+ voice7.engine_id = "id7";
+ voice7.name = "Voice7";
+ voice7.lang = "es-mx";
+ voices.push_back(voice7);
+ VoiceData voice8;
+ voice8.engine_id = "";
+ voice8.name = "Android";
+ voice8.lang = "";
+ voice8.native = true;
+ voices.push_back(voice8);
+
+ std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+ EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+
+ std::set<TtsEventType> types;
+ types.insert(TTS_EVENT_WORD);
+ utterance->SetRequiredEventTypes(types);
+ EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+
+ utterance->SetLang("de-DE");
+ EXPECT_EQ(2, controller->GetMatchingVoice(utterance.get(), voices));
+
+ utterance->SetLang("fr-FR");
+ EXPECT_EQ(3, controller->GetMatchingVoice(utterance.get(), voices));
+
+ utterance->SetVoiceName("Voice4");
+ EXPECT_EQ(4, controller->GetMatchingVoice(utterance.get(), voices));
+
+ utterance->SetVoiceName("");
+ utterance->SetEngineId("id5");
+ EXPECT_EQ(5, controller->GetMatchingVoice(utterance.get(), voices));
+
+#if defined(OS_CHROMEOS)
+ TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids;
+ preferred_voice_ids.locale_voice_id.emplace("Voice7", "id7");
+ preferred_voice_ids.any_locale_voice_id.emplace("Android", "");
+ delegate.SetPreferredVoiceIds(preferred_voice_ids);
+
+ // Voice6 is matched when the utterance locale exactly matches its locale.
+ utterance->SetEngineId("");
+ utterance->SetLang("es-es");
+ EXPECT_EQ(6, controller->GetMatchingVoice(utterance.get(), voices));
+
+ // The 7th voice is the default for "es", even though the utterance is
+ // "es-ar". |voice6| is not matched because it is not the default.
+ utterance->SetEngineId("");
+ utterance->SetLang("es-ar");
+ EXPECT_EQ(7, controller->GetMatchingVoice(utterance.get(), voices));
+
+ // The 8th voice is like the built-in "Android" voice, it has no lang
+ // and no extension ID. Make sure it can still be matched.
+ preferred_voice_ids.locale_voice_id.reset();
+ delegate.SetPreferredVoiceIds(preferred_voice_ids);
+ utterance->SetVoiceName("Android");
+ utterance->SetEngineId("");
+ utterance->SetLang("");
+ EXPECT_EQ(8, controller->GetMatchingVoice(utterance.get(), voices));
+
+ delegate.SetPreferredVoiceIds({});
+#endif
+ }
+
+ {
+ // Check voices against system language.
+ std::vector<VoiceData> voices;
+ VoiceData voice0;
+ voice0.engine_id = "id0";
+ voice0.name = "voice0";
+ voice0.lang = "en-GB";
+ voices.push_back(voice0);
+ VoiceData voice1;
+ voice1.engine_id = "id1";
+ voice1.name = "voice1";
+ voice1.lang = "en-US";
+ voices.push_back(voice1);
+ std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr));
+
+ // voice1 is matched against the exact default system language.
+ TestContentBrowserClient::GetInstance()->set_application_locale("en-US");
+ utterance->SetLang("");
+ EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices));
+
+#if defined(OS_CHROMEOS)
+ // voice0 is matched against the system language which has no region piece.
+ TestContentBrowserClient::GetInstance()->set_application_locale("en");
+ EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+
+ TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids2;
+ preferred_voice_ids2.locale_voice_id.emplace("voice0", "id0");
+ delegate.SetPreferredVoiceIds(preferred_voice_ids2);
+ // voice0 is matched against the pref over the system language.
+ TestContentBrowserClient::GetInstance()->set_application_locale("en-US");
+ EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices));
+#endif
+ }
+}
+
+class TtsControllerTestHelper {
+ public:
+ TtsControllerTestHelper() {
+ controller_.SetTtsPlatform(&platform_impl_);
+ // This ensures utterances don't immediately complete.
+ platform_impl_.set_run_speak_callback(false);
+ platform_impl_.set_is_speaking(true);
+ }
+
+ std::unique_ptr<TestWebContents> CreateWebContents() {
+ return std::unique_ptr<TestWebContents>(
+ TestWebContents::Create(&browser_context_, nullptr));
+ }
+
+ std::unique_ptr<TtsUtteranceImpl> CreateUtterance(WebContents* web_contents) {
+ return std::make_unique<TtsUtteranceImpl>(&browser_context_, web_contents);
+ }
+
+ MockTtsPlatformImpl* platform_impl() { return &platform_impl_; }
+
+ TtsControllerForTesting* controller() { return &controller_; }
+
+ TtsUtterance* TtsControllerCurrentUtterance() {
+ return controller_.current_utterance_.get();
+ }
+
+ bool IsUtteranceListEmpty() { return controller_.utterance_list_.empty(); }
+
+ private:
+ content::BrowserTaskEnvironment task_environment_;
+ RenderViewHostTestEnabler rvh_enabler_;
+ TestBrowserContext browser_context_;
+ MockTtsPlatformImpl platform_impl_;
+ TtsControllerForTesting controller_;
+};
+
+TEST(TtsControllerTest, StopsWhenWebContentsDestroyed) {
+ TtsControllerTestHelper helper;
+ std::unique_ptr<WebContents> web_contents = helper.CreateWebContents();
+ std::unique_ptr<TtsUtteranceImpl> utterance =
+ helper.CreateUtterance(web_contents.get());
+
+ helper.controller()->SpeakOrEnqueue(std::move(utterance));
+ EXPECT_TRUE(helper.controller()->IsSpeaking());
+ EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+
+ web_contents.reset();
+ // Destroying the WebContents should reset
+ // |TtsController::current_utterance_|.
+ EXPECT_FALSE(helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed) {
+ TtsControllerTestHelper helper;
+ std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+ std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents();
+ std::unique_ptr<TtsUtteranceImpl> utterance1 =
+ helper.CreateUtterance(web_contents1.get());
+ void* raw_utterance1 = utterance1.get();
+ std::unique_ptr<TtsUtteranceImpl> utterance2 =
+ helper.CreateUtterance(web_contents2.get());
+ utterance2->SetCanEnqueue(true);
+ void* raw_utterance2 = utterance2.get();
+
+ helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+ EXPECT_TRUE(helper.controller()->IsSpeaking());
+ EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+ helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+ EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance());
+
+ web_contents1.reset();
+ // Destroying |web_contents1| should delete |utterance1| and start
+ // |utterance2|.
+ EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+ EXPECT_EQ(raw_utterance2, helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed2) {
+ TtsControllerTestHelper helper;
+ std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+ std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents();
+ std::unique_ptr<TtsUtteranceImpl> utterance1 =
+ helper.CreateUtterance(web_contents1.get());
+ void* raw_utterance1 = utterance1.get();
+ std::unique_ptr<TtsUtteranceImpl> utterance2 =
+ helper.CreateUtterance(web_contents1.get());
+ std::unique_ptr<TtsUtteranceImpl> utterance3 =
+ helper.CreateUtterance(web_contents2.get());
+ void* raw_utterance3 = utterance3.get();
+ utterance2->SetCanEnqueue(true);
+ utterance3->SetCanEnqueue(true);
+
+ helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+ helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+ helper.controller()->SpeakOrEnqueue(std::move(utterance3));
+ EXPECT_TRUE(helper.controller()->IsSpeaking());
+ EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance());
+
+ web_contents1.reset();
+ // Deleting |web_contents1| should delete |utterance1| and |utterance2| as
+ // they are both from |web_contents1|. |raw_utterance3| should be made the
+ // current as it's from a different WebContents.
+ EXPECT_EQ(raw_utterance3, helper.TtsControllerCurrentUtterance());
+ EXPECT_TRUE(helper.IsUtteranceListEmpty());
+
+ web_contents2.reset();
+ // Deleting |web_contents2| should delete |utterance3| as it's from a
+ // different WebContents.
+ EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+}
+
+TEST(TtsControllerTest, StartsUtteranceWhenWebContentsHidden) {
+ TtsControllerTestHelper helper;
+ std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents();
+ web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+ std::unique_ptr<TtsUtteranceImpl> utterance =
+ helper.CreateUtterance(web_contents.get());
+ helper.controller()->SpeakOrEnqueue(std::move(utterance));
+ EXPECT_TRUE(helper.controller()->IsSpeaking());
+}
+
+TEST(TtsControllerTest,
+ DoesNotStartUtteranceWhenWebContentsHiddenAndStopSpeakingWhenHiddenSet) {
+ TtsControllerTestHelper helper;
+ std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents();
+ web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+ std::unique_ptr<TtsUtteranceImpl> utterance =
+ helper.CreateUtterance(web_contents.get());
+ helper.controller()->SetStopSpeakingWhenHidden(true);
+ helper.controller()->SpeakOrEnqueue(std::move(utterance));
+ EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+ EXPECT_TRUE(helper.IsUtteranceListEmpty());
+}
+
+TEST(TtsControllerTest, SkipsQueuedUtteranceFromHiddenWebContents) {
+ TtsControllerTestHelper helper;
+ helper.controller()->SetStopSpeakingWhenHidden(true);
+ std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents();
+ std::unique_ptr<TestWebContents> web_contents2 = helper.CreateWebContents();
+ std::unique_ptr<TtsUtteranceImpl> utterance1 =
+ helper.CreateUtterance(web_contents1.get());
+ const int utterance1_id = utterance1->GetId();
+ std::unique_ptr<TtsUtteranceImpl> utterance2 =
+ helper.CreateUtterance(web_contents2.get());
+ utterance2->SetCanEnqueue(true);
+
+ helper.controller()->SpeakOrEnqueue(std::move(utterance1));
+ EXPECT_TRUE(helper.TtsControllerCurrentUtterance());
+ EXPECT_TRUE(helper.IsUtteranceListEmpty());
+
+ // Speak |utterance2|, which should get queued.
+ helper.controller()->SpeakOrEnqueue(std::move(utterance2));
+ EXPECT_FALSE(helper.IsUtteranceListEmpty());
+
+ // Make the second WebContents hidden, this shouldn't change anything in
+ // TtsController.
+ web_contents2->SetVisibilityAndNotifyObservers(Visibility::HIDDEN);
+ EXPECT_FALSE(helper.IsUtteranceListEmpty());
+
+ // Finish |utterance1|, which should skip |utterance2| because |web_contents2|
+ // is hidden.
+ helper.controller()->OnTtsEvent(utterance1_id, TTS_EVENT_END, 0, 0, {});
+ EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance());
+ EXPECT_TRUE(helper.IsUtteranceListEmpty());
+}
} // namespace content
diff --git a/chromium/content/browser/speech/tts_linux.cc b/chromium/content/browser/speech/tts_linux.cc
index 2c6d2d03133..afd36df9ed3 100644
--- a/chromium/content/browser/speech/tts_linux.cc
+++ b/chromium/content/browser/speech/tts_linux.cc
@@ -14,7 +14,6 @@
#include "base/macros.h"
#include "base/memory/singleton.h"
#include "base/synchronization/lock.h"
-#include "base/task/post_task.h"
#include "base/task/thread_pool.h"
#include "content/browser/speech/tts_platform_impl.h"
#include "content/public/browser/browser_task_traits.h"
@@ -128,7 +127,7 @@ void TtsPlatformImplLinux::Initialize() {
// spd_open has memory leaks which are hard to suppress.
// http://crbug.com/317360
ANNOTATE_SCOPED_MEMORY_LEAK;
- conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL,
+ conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
SPD_MODE_THREADED);
}
if (!conn_)
@@ -151,7 +150,7 @@ TtsPlatformImplLinux::~TtsPlatformImplLinux() {
base::AutoLock lock(initialization_lock_);
if (conn_) {
libspeechd_loader_.spd_close(conn_);
- conn_ = NULL;
+ conn_ = nullptr;
}
}
@@ -159,14 +158,14 @@ void TtsPlatformImplLinux::Reset() {
base::AutoLock lock(initialization_lock_);
if (conn_)
libspeechd_loader_.spd_close(conn_);
- conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL,
+ conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
SPD_MODE_THREADED);
}
bool TtsPlatformImplLinux::PlatformImplAvailable() {
if (!initialization_lock_.Try())
return false;
- bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
+ bool result = libspeechd_loader_.loaded() && (conn_ != nullptr);
initialization_lock_.Release();
return result;
}
@@ -345,8 +344,8 @@ void TtsPlatformImplLinux::NotificationCallback(size_t msg_id,
// be in a separate thread.
if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
current_notification_ = type;
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
type));
@@ -365,8 +364,8 @@ void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
// be in a separate thread.
if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
current_notification_ = state;
- base::PostTask(
- FROM_HERE, {BrowserThread::UI},
+ GetUIThreadTaskRunner({})->PostTask(
+ FROM_HERE,
base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
state));
diff --git a/chromium/content/browser/speech/tts_utterance_impl.cc b/chromium/content/browser/speech/tts_utterance_impl.cc
index 34ff42b55c5..aa6759c3973 100644
--- a/chromium/content/browser/speech/tts_utterance_impl.cc
+++ b/chromium/content/browser/speech/tts_utterance_impl.cc
@@ -3,6 +3,7 @@
// found in the LICENSE file.
#include "content/browser/speech/tts_utterance_impl.h"
+
#include "base/values.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
@@ -37,11 +38,14 @@ int TtsUtteranceImpl::next_utterance_id_ = 0;
std::unique_ptr<TtsUtterance> TtsUtterance::Create(
BrowserContext* browser_context) {
- return std::make_unique<TtsUtteranceImpl>(browser_context);
+ return std::make_unique<TtsUtteranceImpl>(browser_context, nullptr);
}
-TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context)
- : browser_context_(browser_context),
+TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context,
+ WebContents* web_contents)
+ : WebContentsObserver(web_contents),
+ browser_context_(browser_context),
+ was_created_with_web_contents_(web_contents != nullptr),
id_(next_utterance_id_++),
src_id_(-1),
can_enqueue_(false),
diff --git a/chromium/content/browser/speech/tts_utterance_impl.h b/chromium/content/browser/speech/tts_utterance_impl.h
index fc73f7c7570..2b54961acbd 100644
--- a/chromium/content/browser/speech/tts_utterance_impl.h
+++ b/chromium/content/browser/speech/tts_utterance_impl.h
@@ -5,22 +5,32 @@
#ifndef CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_
#define CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_
+#include <memory>
#include <set>
#include <string>
-#include "base/values.h"
-#include "content/public/browser/tts_controller.h"
#include "content/public/browser/tts_utterance.h"
+#include "content/public/browser/web_contents_observer.h"
+
+namespace base {
+class Value;
+}
namespace content {
class BrowserContext;
+class WebContents;
// Implementation of TtsUtterance.
-class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance {
+class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance,
+ public WebContentsObserver {
public:
- TtsUtteranceImpl(BrowserContext* browser_context);
+ TtsUtteranceImpl(BrowserContext* browser_context, WebContents* web_contents);
~TtsUtteranceImpl() override;
+ bool was_created_with_web_contents() const {
+ return was_created_with_web_contents_;
+ }
+
// TtsUtterance overrides.
void OnTtsEvent(TtsEventType event_type,
int char_index,
@@ -77,6 +87,9 @@ class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance {
// The BrowserContext that initiated this utterance.
BrowserContext* browser_context_;
+ // True if the constructor was supplied with a WebContents.
+ const bool was_created_with_web_contents_;
+
// The content embedder engine ID of the engine providing TTS for this
// utterance, or empty if native TTS is being used.
std::string engine_id_;