diff options
Diffstat (limited to 'chromium/content/browser/speech')
21 files changed, 854 insertions, 519 deletions
diff --git a/chromium/content/browser/speech/DEPS b/chromium/content/browser/speech/DEPS index d3ee893ebac..7c726080d23 100644 --- a/chromium/content/browser/speech/DEPS +++ b/chromium/content/browser/speech/DEPS @@ -1,3 +1,11 @@ include_rules = [ + "+components/speech", "+google_apis", # Exception to general rule, see content/DEPS for details. ] + +specific_include_rules = { + "tts_controller_impl\.cc": [ + # TtsControllerImpl uses GetLanguage(), which is not grd related. + "+ui/base/l10n/l10n_util.h", + ], +} diff --git a/chromium/content/browser/speech/proto/BUILD.gn b/chromium/content/browser/speech/proto/BUILD.gn deleted file mode 100644 index dafd61f78ee..00000000000 --- a/chromium/content/browser/speech/proto/BUILD.gn +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright 2014 The Chromium Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//third_party/protobuf/proto_library.gni") - -proto_library("proto") { - sources = [ "google_streaming_api.proto" ] -} diff --git a/chromium/content/browser/speech/proto/google_streaming_api.proto b/chromium/content/browser/speech/proto/google_streaming_api.proto deleted file mode 100644 index ce1b8d98a49..00000000000 --- a/chromium/content/browser/speech/proto/google_streaming_api.proto +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2012 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -syntax = "proto2"; -option optimize_for = LITE_RUNTIME; - -// TODO(hans): Commented out due to compilation errors. -// option cc_api_version = 2; - -package content.proto; - -// SpeechRecognitionEvent is the only message type sent to client. -// -// The first SpeechRecognitionEvent is an empty (default) message to indicate -// as early as possible that the stream connection has been established. -message SpeechRecognitionEvent { - enum StatusCode { - // Note: in JavaScript API SpeechRecognitionError 0 is "OTHER" error. - STATUS_SUCCESS = 0; - STATUS_NO_SPEECH = 1; - STATUS_ABORTED = 2; - STATUS_AUDIO_CAPTURE = 3; - STATUS_NETWORK = 4; - STATUS_NOT_ALLOWED = 5; - STATUS_SERVICE_NOT_ALLOWED = 6; - STATUS_BAD_GRAMMAR = 7; - STATUS_LANGUAGE_NOT_SUPPORTED = 8; - } - optional StatusCode status = 1 [default = STATUS_SUCCESS]; - - // May contain zero or one final=true result (the newly settled portion). - // May also contain zero or more final=false results. - // (Note that this differs from JavaScript API resultHistory in that no more - // than one final=true result is returned, so client must accumulate - // resultHistory by concatenating the final=true results.) - repeated SpeechRecognitionResult result = 2; - - enum EndpointerEventType { - START_OF_SPEECH = 0; - END_OF_SPEECH = 1; - END_OF_AUDIO = 2; // End of audio stream has been reached. - // End of utterance indicates that no more speech segments are expected. - END_OF_UTTERANCE = 3; - } - - optional EndpointerEventType endpoint = 4; -}; - -message SpeechRecognitionResult { - repeated SpeechRecognitionAlternative alternative = 1; - - // True if this is the final time the speech service will return this - // particular SpeechRecognitionResult. If false, then this represents an - // interim result that could still be changed. - optional bool final = 2 [default = false]; - - // An estimate of the probability that the recognizer will not change its - // guess about this interim result. Values range from 0.0 (completely - // unstable) to 1.0 (completely stable). Note that this is not the same as - // "confidence", which estimate the probability that a recognition result - // is correct. This field is only provided for interim (final=false) results. - optional float stability = 3; -}; - -// Item in N-best list. -message SpeechRecognitionAlternative { - // Spoken text. - optional string transcript = 1; - - // The confidence estimate between 0.0 and 1.0. A higher number means the - // system is more confident that the recognition is correct. - // This field is typically provided only for the top hypothesis and only for - // final results. - optional float confidence = 2; -} diff --git a/chromium/content/browser/speech/speech_recognition_browsertest.cc b/chromium/content/browser/speech/speech_recognition_browsertest.cc index bb688ed2d2e..cf1dc5cbf2c 100644 --- a/chromium/content/browser/speech/speech_recognition_browsertest.cc +++ b/chromium/content/browser/speech/speech_recognition_browsertest.cc @@ -17,15 +17,14 @@ #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/sys_byteorder.h" -#include "base/task/post_task.h" #include "base/threading/thread_task_runner_handle.h" #include "build/build_config.h" -#include "content/browser/speech/proto/google_streaming_api.pb.h" #include "content/browser/speech/speech_recognition_engine.h" #include "content/browser/speech/speech_recognition_manager_impl.h" #include "content/browser/speech/speech_recognizer_impl.h" #include "content/public/browser/browser_task_traits.h" #include "content/public/browser/browser_thread.h" +#include "content/public/browser/google_streaming_api.pb.h" #include "content/public/browser/notification_types.h" #include "content/public/browser/web_contents.h" #include "content/public/test/browser_test.h" @@ -61,8 +60,8 @@ class MockAudioSystem : public media::AudioSystem { // Posting callback to allow current SpeechRecognizerImpl dispatching event // to complete before transitioning to the next FSM state. - base::PostTask( - FROM_HERE, {content::BrowserThread::IO}, + content::GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(std::move(on_params_cb), media::AudioParameters::UnavailableDeviceParams())); } @@ -230,8 +229,8 @@ class SpeechRecognitionBrowserTest : public ContentBrowserTest { // AudioCaptureSourcer::Stop() again. SpeechRecognizerImpl::SetAudioEnvironmentForTesting(nullptr, nullptr); - base::PostTask(FROM_HERE, {content::BrowserThread::UI}, - base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse, + content::GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognitionBrowserTest::SendResponse, base::Unretained(this))); } diff --git a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc index 443516d9a70..155fc32d1ca 100644 --- a/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc +++ b/chromium/content/browser/speech/speech_recognition_dispatcher_host.cc @@ -9,7 +9,6 @@ #include "base/bind.h" #include "base/command_line.h" #include "base/lazy_instance.h" -#include "base/task/post_task.h" #include "content/browser/browser_plugin/browser_plugin_guest.h" #include "content/browser/frame_host/frame_tree_node.h" #include "content/browser/frame_host/render_frame_host_manager.h" @@ -62,8 +61,8 @@ void SpeechRecognitionDispatcherHost::Start( blink::mojom::StartSpeechRecognitionRequestParamsPtr params) { DCHECK_CURRENTLY_ON(BrowserThread::IO); - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognitionDispatcherHost::StartRequestOnUI, AsWeakPtr(), render_process_id_, render_frame_id_, std::move(params))); @@ -89,6 +88,14 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI( WebContentsImpl* web_contents = static_cast<WebContentsImpl*>(WebContents::FromRenderFrameHost(rfh)); + // Disable BackForwardCache when using the SpeechRecognition feature, because + // currently we do not handle speech recognition after placing the page in + // BackForwardCache. + // TODO(sreejakshetty): Make SpeechRecognition compatible with + // BackForwardCache. + rfh->OnSchedulerTrackedFeatureUsed( + blink::scheduler::WebSchedulerTrackedFeature::kSpeechRecognizer); + // If the speech API request was from an inner WebContents or a guest, save // the context of the outer WebContents or the embedder since we will use it // to decide permission. @@ -126,8 +133,8 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI( StoragePartition* storage_partition = BrowserContext::GetStoragePartition( browser_context, web_contents->GetSiteInstance()); - base::PostTask( - FROM_HERE, {BrowserThread::IO}, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce( &SpeechRecognitionDispatcherHost::StartSessionOnIO, speech_recognition_dispatcher_host, std::move(params), diff --git a/chromium/content/browser/speech/speech_recognition_engine.cc b/chromium/content/browser/speech/speech_recognition_engine.cc index e55a430e581..c7af8469cee 100644 --- a/chromium/content/browser/speech/speech_recognition_engine.cc +++ b/chromium/content/browser/speech/speech_recognition_engine.cc @@ -15,7 +15,7 @@ #include "base/strings/utf_string_conversions.h" #include "base/time/time.h" #include "content/browser/speech/audio_buffer.h" -#include "content/browser/speech/proto/google_streaming_api.pb.h" +#include "content/public/browser/google_streaming_api.pb.h" #include "google_apis/google_api_keys.h" #include "mojo/public/c/system/types.h" #include "mojo/public/cpp/bindings/receiver_set.h" @@ -23,8 +23,6 @@ #include "net/base/load_flags.h" #include "net/traffic_annotation/network_traffic_annotation.h" #include "services/network/public/cpp/shared_url_loader_factory.h" -#include "services/network/public/cpp/simple_url_loader.h" -#include "services/network/public/mojom/chunked_data_pipe_getter.mojom.h" #include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" @@ -42,9 +40,6 @@ const char* web_service_base_url_for_tests = nullptr; // This matches the maximum maxAlternatives value supported by the server. const uint32_t kMaxMaxAlternatives = 30; -// Maximum amount of data written per Mojo write. -const uint32_t kMaxUploadWrite = 128 * 1024; - // TODO(hans): Remove this and other logging when we don't need it anymore. void DumpResponse(const std::string& response) { DVLOG(1) << "------------"; @@ -81,198 +76,6 @@ const uint32_t kDefaultMaxHypotheses = 1; } // namespace -// Streams sound data up to the server. -class SpeechRecognitionEngine::UpstreamLoader - : public network::mojom::ChunkedDataPipeGetter { - public: - UpstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request, - net::NetworkTrafficAnnotationTag upstream_traffic_annotation, - network::mojom::URLLoaderFactory* url_loader_factory, - SpeechRecognitionEngine* speech_recognition_engine) - : speech_recognition_engine_(speech_recognition_engine) { - // Attach a chunked upload body. - mojo::PendingRemote<network::mojom::ChunkedDataPipeGetter> data_remote; - receiver_set_.Add(this, data_remote.InitWithNewPipeAndPassReceiver()); - resource_request->request_body = new network::ResourceRequestBody(); - resource_request->request_body->SetToChunkedDataPipe( - std::move(data_remote)); - simple_url_loader_ = network::SimpleURLLoader::Create( - std::move(resource_request), upstream_traffic_annotation); - simple_url_loader_->DownloadToStringOfUnboundedSizeUntilCrashAndDie( - url_loader_factory, - base::BindOnce(&UpstreamLoader::OnComplete, base::Unretained(this))); - } - - ~UpstreamLoader() override = default; - - void OnComplete(std::unique_ptr<std::string> response_body) { - int response_code = -1; - if (simple_url_loader_->ResponseInfo() && - simple_url_loader_->ResponseInfo()->headers) { - response_code = - simple_url_loader_->ResponseInfo()->headers->response_code(); - } - speech_recognition_engine_->OnUpstreamDataComplete(response_body != nullptr, - response_code); - } - - void AppendChunkToUpload(const std::string& data, bool is_last_chunk) { - DCHECK(!has_last_chunk_); - - upload_body_ += data; - if (is_last_chunk) { - // Send size before the rest of the body. While it doesn't matter much, if - // the other side receives the size before the last chunk, which Mojo does - // not gaurantee, some protocols can merge the data and the last chunk - // itself into a single frame. - has_last_chunk_ = is_last_chunk; - if (get_size_callback_) - std::move(get_size_callback_).Run(net::OK, upload_body_.size()); - } - - SendData(); - } - - private: - void OnUploadPipeWriteable(MojoResult unused) { SendData(); } - - // Attempts to send more of the upload body, if more data is available, and - // |upload_pipe_| is valid. - void SendData() { - DCHECK_LE(upload_position_, upload_body_.size()); - - if (!upload_pipe_.is_valid()) - return; - - // Nothing more to write yet, or done writing everything. - if (upload_position_ == upload_body_.size()) - return; - - // Since kMaxUploadWrite is a uint32_t, no overflow occurs in this downcast. - uint32_t write_bytes = std::min(upload_body_.length() - upload_position_, - static_cast<size_t>(kMaxUploadWrite)); - MojoResult result = - upload_pipe_->WriteData(upload_body_.data() + upload_position_, - &write_bytes, MOJO_WRITE_DATA_FLAG_NONE); - - // Wait for the pipe to have more capacity available, if needed. - if (result == MOJO_RESULT_SHOULD_WAIT) { - upload_pipe_watcher_->ArmOrNotify(); - return; - } - - // Do nothing on pipe closure - depend on the SimpleURLLoader to notice the - // other pipes being closed on error. Can reach this point if there's a - // retry, for instance, so cannot draw any conclusions here. - if (result != MOJO_RESULT_OK) - return; - - upload_position_ += write_bytes; - // If more data is available, arm the watcher again. Don't write again in a - // loop, even if WriteData would allow it, to avoid blocking the current - // thread. - if (upload_position_ < upload_body_.size()) - upload_pipe_watcher_->ArmOrNotify(); - } - - // mojom::ChunkedDataPipeGetter implementation: - - void GetSize(GetSizeCallback get_size_callback) override { - if (has_last_chunk_) { - std::move(get_size_callback).Run(net::OK, upload_body_.size()); - } else { - get_size_callback_ = std::move(get_size_callback); - } - } - - void StartReading(mojo::ScopedDataPipeProducerHandle pipe) override { - // Delete any existing pipe, if any. - upload_pipe_watcher_.reset(); - upload_pipe_ = std::move(pipe); - upload_pipe_watcher_ = std::make_unique<mojo::SimpleWatcher>( - FROM_HERE, mojo::SimpleWatcher::ArmingPolicy::MANUAL); - upload_pipe_watcher_->Watch( - upload_pipe_.get(), MOJO_HANDLE_SIGNAL_WRITABLE, - base::BindRepeating(&UpstreamLoader::OnUploadPipeWriteable, - base::Unretained(this))); - upload_position_ = 0; - - // Will attempt to start sending the request body, if any data is available. - SendData(); - } - - // Partial upload body. Have to cache the entire thing in memory, in case have - // to replay it. - std::string upload_body_; - // Current position in |upload_body_|. All bytes before this point have been - // written to |upload_pipe_|. - size_t upload_position_ = 0; - // Whether |upload_body_| is complete. - bool has_last_chunk_ = false; - - // Current pipe being used to send the |upload_body_| to the URLLoader. - mojo::ScopedDataPipeProducerHandle upload_pipe_; - // Watches |upload_pipe_| for writeability. - std::unique_ptr<mojo::SimpleWatcher> upload_pipe_watcher_; - - // If non-null, invoked once the size of the upload is known. - network::mojom::ChunkedDataPipeGetter::GetSizeCallback get_size_callback_; - - SpeechRecognitionEngine* const speech_recognition_engine_; - std::unique_ptr<network::SimpleURLLoader> simple_url_loader_; - mojo::ReceiverSet<network::mojom::ChunkedDataPipeGetter> receiver_set_; - - DISALLOW_COPY_AND_ASSIGN(UpstreamLoader); -}; - -// Streams response data from the server to the SpeechRecognitionEngine. -class SpeechRecognitionEngine::DownstreamLoader - : public network::SimpleURLLoaderStreamConsumer { - public: - DownstreamLoader(std::unique_ptr<network::ResourceRequest> resource_request, - net::NetworkTrafficAnnotationTag upstream_traffic_annotation, - network::mojom::URLLoaderFactory* url_loader_factory, - SpeechRecognitionEngine* speech_recognition_engine) - : speech_recognition_engine_(speech_recognition_engine) { - simple_url_loader_ = network::SimpleURLLoader::Create( - std::move(resource_request), upstream_traffic_annotation); - simple_url_loader_->DownloadAsStream(url_loader_factory, this); - } - - ~DownstreamLoader() override = default; - - // SimpleURLLoaderStreamConsumer implementation: - - void OnDataReceived(base::StringPiece string_piece, - base::OnceClosure resume) override { - speech_recognition_engine_->OnDownstreamDataReceived(string_piece); - std::move(resume).Run(); - } - - void OnComplete(bool success) override { - int response_code = -1; - if (simple_url_loader_->ResponseInfo() && - simple_url_loader_->ResponseInfo()->headers) { - response_code = - simple_url_loader_->ResponseInfo()->headers->response_code(); - } - - speech_recognition_engine_->OnDownstreamDataComplete(success, - response_code); - } - - void OnRetry(base::OnceClosure start_retry) override { - // Retries are not enabled for these requests. - NOTREACHED(); - } - - private: - SpeechRecognitionEngine* const speech_recognition_engine_; - std::unique_ptr<network::SimpleURLLoader> simple_url_loader_; - - DISALLOW_COPY_AND_ASSIGN(DownstreamLoader); -}; - SpeechRecognitionEngine::Config::Config() : filter_profanities(false), continuous(true), @@ -562,7 +365,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) { auto downstream_request = std::make_unique<network::ResourceRequest>(); downstream_request->credentials_mode = network::mojom::CredentialsMode::kOmit; downstream_request->url = downstream_url; - downstream_loader_ = std::make_unique<DownstreamLoader>( + downstream_loader_ = std::make_unique<speech::DownstreamLoader>( std::move(downstream_request), downstream_traffic_annotation, shared_url_loader_factory_.get(), this); @@ -667,7 +470,7 @@ SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) { encoder_->GetMimeType()); } - upstream_loader_ = std::make_unique<UpstreamLoader>( + upstream_loader_ = std::make_unique<speech::UpstreamLoader>( std::move(upstream_request), upstream_traffic_annotation, shared_url_loader_factory_.get(), this); diff --git a/chromium/content/browser/speech/speech_recognition_engine.h b/chromium/content/browser/speech/speech_recognition_engine.h index 1f3501200e5..d1e99750312 100644 --- a/chromium/content/browser/speech/speech_recognition_engine.h +++ b/chromium/content/browser/speech/speech_recognition_engine.h @@ -14,6 +14,10 @@ #include "base/memory/ref_counted.h" #include "base/sequence_checker.h" #include "base/strings/string_piece.h" +#include "components/speech/downstream_loader.h" +#include "components/speech/downstream_loader_client.h" +#include "components/speech/upstream_loader.h" +#include "components/speech/upstream_loader_client.h" #include "content/browser/speech/audio_encoder.h" #include "content/browser/speech/chunked_byte_buffer.h" #include "content/common/content_export.h" @@ -59,7 +63,9 @@ struct SpeechRecognitionError; // EndRecognition. If a recognition was started, the caller can free the // SpeechRecognitionEngine only after calling EndRecognition. -class CONTENT_EXPORT SpeechRecognitionEngine { +class CONTENT_EXPORT SpeechRecognitionEngine + : public speech::UpstreamLoaderClient, + public speech::DownstreamLoaderClient { public: class Delegate { public: @@ -104,7 +110,7 @@ class CONTENT_EXPORT SpeechRecognitionEngine { SpeechRecognitionEngine( scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory, const std::string& accept_language); - ~SpeechRecognitionEngine(); + ~SpeechRecognitionEngine() override; // Sets the URL requests are sent to for tests. static void set_web_service_base_url_for_tests( @@ -119,8 +125,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine { int GetDesiredAudioChunkDurationMs() const; private: - class UpstreamLoader; - class DownstreamLoader; + friend class speech::UpstreamLoaderClient; + friend class speech::DownstreamLoader; Delegate* delegate_; @@ -171,10 +177,12 @@ class CONTENT_EXPORT SpeechRecognitionEngine { DISALLOW_COPY_AND_ASSIGN(FSMEventArgs); }; - void OnUpstreamDataComplete(bool success, int response_code); + // speech::UpstreamLoaderClient + void OnUpstreamDataComplete(bool success, int response_code) override; - void OnDownstreamDataReceived(base::StringPiece new_response_data); - void OnDownstreamDataComplete(bool success, int response_code); + // speech::DownstreamLoaderClient + void OnDownstreamDataReceived(base::StringPiece new_response_data) override; + void OnDownstreamDataComplete(bool success, int response_code) override; // Entry point for pushing any new external event into the recognizer FSM. void DispatchEvent(const FSMEventArgs& event_args); @@ -204,8 +212,8 @@ class CONTENT_EXPORT SpeechRecognitionEngine { void UploadAudioChunk(const std::string& data, FrameType type, bool is_final); Config config_; - std::unique_ptr<UpstreamLoader> upstream_loader_; - std::unique_ptr<DownstreamLoader> downstream_loader_; + std::unique_ptr<speech::UpstreamLoader> upstream_loader_; + std::unique_ptr<speech::DownstreamLoader> downstream_loader_; scoped_refptr<network::SharedURLLoaderFactory> shared_url_loader_factory_; const std::string accept_language_; std::unique_ptr<AudioEncoder> encoder_; diff --git a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc index 1312af4f6f6..a68c4165ff5 100644 --- a/chromium/content/browser/speech/speech_recognition_engine_unittest.cc +++ b/chromium/content/browser/speech/speech_recognition_engine_unittest.cc @@ -17,7 +17,7 @@ #include "base/sys_byteorder.h" #include "base/test/task_environment.h" #include "content/browser/speech/audio_buffer.h" -#include "content/browser/speech/proto/google_streaming_api.pb.h" +#include "content/public/browser/google_streaming_api.pb.h" #include "mojo/public/cpp/bindings/remote.h" #include "net/base/net_errors.h" #include "net/http/http_response_headers.h" diff --git a/chromium/content/browser/speech/speech_recognition_manager_impl.cc b/chromium/content/browser/speech/speech_recognition_manager_impl.cc index 9a55a7c6e80..4220ef86ec3 100644 --- a/chromium/content/browser/speech/speech_recognition_manager_impl.cc +++ b/chromium/content/browser/speech/speech_recognition_manager_impl.cc @@ -15,7 +15,6 @@ #include "base/memory/ref_counted_delete_on_sequence.h" #include "base/sequenced_task_runner.h" #include "base/single_thread_task_runner.h" -#include "base/task/post_task.h" #include "base/threading/thread_task_runner_handle.h" #include "build/build_config.h" #include "content/browser/browser_main_loop.h" @@ -196,10 +195,9 @@ void SpeechRecognitionManagerImpl::FrameDeletionObserver::ContentsObserver:: RenderFrameDeleted(RenderFrameHost* render_frame_host) { auto iters = observed_frames_.equal_range(render_frame_host); for (auto it = iters.first; it != iters.second; ++it) { - base::CreateSingleThreadTaskRunner({BrowserThread::IO}) - ->PostTask(FROM_HERE, - base::BindOnce(parent_observer_->frame_deleted_callback_, - it->second)); + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, + base::BindOnce(parent_observer_->frame_deleted_callback_, it->second)); } observed_frames_.erase(iters.first, iters.second); @@ -303,14 +301,13 @@ int SpeechRecognitionManagerImpl::CreateSession( // The deletion observer is owned by this class, so it's safe to use // Unretained. - base::CreateSingleThreadTaskRunner({BrowserThread::UI}) - ->PostTask( - FROM_HERE, - base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver:: - CreateObserverForSession, - base::Unretained(frame_deletion_observer_.get()), - config.initial_context.render_process_id, - config.initial_context.render_frame_id, session_id)); + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, + base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver:: + CreateObserverForSession, + base::Unretained(frame_deletion_observer_.get()), + config.initial_context.render_process_id, + config.initial_context.render_frame_id, session_id)); return session_id; } @@ -413,15 +410,14 @@ void SpeechRecognitionManagerImpl::AbortSession(int session_id) { // The deletion observer is owned by this class, so it's safe to use // Unretained. - base::CreateSingleThreadTaskRunner({BrowserThread::UI}) - ->PostTask( - FROM_HERE, - base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver:: - RemoveObserverForSession, - base::Unretained(frame_deletion_observer_.get()), - iter->second->config.initial_context.render_process_id, - iter->second->config.initial_context.render_frame_id, - session_id)); + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, + base::BindOnce(&SpeechRecognitionManagerImpl::FrameDeletionObserver:: + RemoveObserverForSession, + base::Unretained(frame_deletion_observer_.get()), + iter->second->config.initial_context.render_process_id, + iter->second->config.initial_context.render_frame_id, + session_id)); AbortSessionImpl(session_id); } diff --git a/chromium/content/browser/speech/speech_recognizer.h b/chromium/content/browser/speech/speech_recognizer.h index 64c896518a2..7a5f5204eee 100644 --- a/chromium/content/browser/speech/speech_recognizer.h +++ b/chromium/content/browser/speech/speech_recognizer.h @@ -5,7 +5,7 @@ #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ -#include "base/logging.h" +#include "base/check.h" #include "base/macros.h" #include "base/memory/ref_counted.h" #include "content/common/content_export.h" diff --git a/chromium/content/browser/speech/speech_recognizer_impl.cc b/chromium/content/browser/speech/speech_recognizer_impl.cc index b03a554f559..c17b704614b 100644 --- a/chromium/content/browser/speech/speech_recognizer_impl.cc +++ b/chromium/content/browser/speech/speech_recognizer_impl.cc @@ -11,7 +11,6 @@ #include "base/bind.h" #include "base/macros.h" #include "base/numerics/ranges.h" -#include "base/task/post_task.h" #include "base/time/time.h" #include "build/build_config.h" #include "content/browser/browser_main_loop.h" @@ -223,22 +222,22 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { DCHECK(!device_id.empty()); device_id_ = device_id; - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), FSMEventArgs(EVENT_PREPARE))); } void SpeechRecognizerImpl::AbortRecognition() { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), FSMEventArgs(EVENT_ABORT))); } void SpeechRecognizerImpl::StopAudioCapture() { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), FSMEventArgs(EVENT_STOP_CAPTURE))); } @@ -278,15 +277,15 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data, // Convert audio from native format to fixed format used by WebSpeech. FSMEventArgs event_args(EVENT_AUDIO_DATA); event_args.audio_data = audio_converter_->Convert(data); - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), event_args)); // See http://crbug.com/506051 regarding why one extra convert call can // sometimes be required. It should be a rare case. if (!audio_converter_->data_was_converted()) { event_args.audio_data = audio_converter_->Convert(data); - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), event_args)); } // Something is seriously wrong here and we are most likely missing some @@ -296,8 +295,8 @@ void SpeechRecognizerImpl::Capture(const AudioBus* data, void SpeechRecognizerImpl::OnCaptureError(const std::string& message) { FSMEventArgs event_args(EVENT_AUDIO_ERROR); - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), event_args)); } @@ -305,8 +304,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults( const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { FSMEventArgs event_args(EVENT_ENGINE_RESULT); event_args.engine_results = mojo::Clone(results); - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), event_args)); } @@ -319,8 +318,8 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( const blink::mojom::SpeechRecognitionError& error) { FSMEventArgs event_args(EVENT_ENGINE_ERROR); event_args.engine_error = error; - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImpl::DispatchEvent, weak_ptr_factory_.GetWeakPtr(), event_args)); } @@ -880,6 +879,7 @@ void SpeechRecognizerImpl::CreateAudioCapturerSource() { stream_factory.InitWithNewPipeAndPassReceiver()); audio_capturer_source_ = audio::CreateInputDevice( std::move(stream_factory), device_id_, + audio::DeadStreamDetection::kEnabled, MediaInternals::GetInstance()->CreateMojoAudioLog( media::AudioLogFactory::AUDIO_INPUT_CONTROLLER, 0 /* component_id */)); diff --git a/chromium/content/browser/speech/speech_recognizer_impl_android.cc b/chromium/content/browser/speech/speech_recognizer_impl_android.cc index c72dd8464c2..49ff8496d40 100644 --- a/chromium/content/browser/speech/speech_recognizer_impl_android.cc +++ b/chromium/content/browser/speech/speech_recognizer_impl_android.cc @@ -12,7 +12,6 @@ #include "base/android/scoped_java_ref.h" #include "base/bind.h" #include "base/strings/utf_string_conversions.h" -#include "base/task/post_task.h" #include "content/public/android/content_jni_headers/SpeechRecognitionImpl_jni.h" #include "content/public/browser/browser_task_traits.h" #include "content/public/browser/browser_thread.h" @@ -42,14 +41,14 @@ void SpeechRecognizerImplAndroid::StartRecognition( const std::string& device_id) { DCHECK_CURRENTLY_ON(BrowserThread::IO); // TODO(xians): Open the correct device for speech on Android. - base::PostTask( - FROM_HERE, {BrowserThread::IO}, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognitionEventListener::OnRecognitionStart, base::Unretained(listener()), session_id())); SpeechRecognitionSessionConfig config = SpeechRecognitionManager::GetInstance()->GetSessionConfig(session_id()); - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce( &content::SpeechRecognizerImplAndroid::StartRecognitionOnUIThread, this, config.language, config.continuous, config.interim_results)); @@ -71,8 +70,8 @@ void SpeechRecognizerImplAndroid::StartRecognitionOnUIThread( void SpeechRecognizerImplAndroid::AbortRecognition() { if (BrowserThread::CurrentlyOn(BrowserThread::IO)) { state_ = STATE_IDLE; - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&content::SpeechRecognizerImplAndroid::AbortRecognition, this)); return; @@ -85,8 +84,8 @@ void SpeechRecognizerImplAndroid::AbortRecognition() { void SpeechRecognizerImplAndroid::StopAudioCapture() { if (BrowserThread::CurrentlyOn(BrowserThread::IO)) { - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&content::SpeechRecognizerImplAndroid::StopAudioCapture, this)); return; @@ -111,8 +110,8 @@ void SpeechRecognizerImplAndroid::OnAudioStart( JNIEnv* env, const JavaParamRef<jobject>& obj) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioStart, this, nullptr, nullptr)); return; } @@ -125,8 +124,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart( JNIEnv* env, const JavaParamRef<jobject>& obj) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundStart, this, nullptr, nullptr)); return; } @@ -137,8 +136,8 @@ void SpeechRecognizerImplAndroid::OnSoundStart( void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env, const JavaParamRef<jobject>& obj) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnSoundEnd, this, nullptr, nullptr)); return; } @@ -149,8 +148,8 @@ void SpeechRecognizerImplAndroid::OnSoundEnd(JNIEnv* env, void SpeechRecognizerImplAndroid::OnAudioEnd(JNIEnv* env, const JavaParamRef<jobject>& obj) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask(FROM_HERE, {BrowserThread::IO}, - base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnAudioEnd, this, nullptr, nullptr)); return; } @@ -181,8 +180,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults( options[i], static_cast<double>(scores[i]))); } result->is_provisional = provisional; - base::PostTask( - FROM_HERE, {BrowserThread::IO}, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce( &SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread, this, std::move(results))); @@ -199,8 +198,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionError( const JavaParamRef<jobject>& obj, jint error) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask( - FROM_HERE, {BrowserThread::IO}, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionError, this, nullptr, nullptr, error)); return; @@ -217,8 +216,8 @@ void SpeechRecognizerImplAndroid::OnRecognitionEnd( JNIEnv* env, const JavaParamRef<jobject>& obj) { if (BrowserThread::CurrentlyOn(BrowserThread::UI)) { - base::PostTask( - FROM_HERE, {BrowserThread::IO}, + GetIOThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&SpeechRecognizerImplAndroid::OnRecognitionEnd, this, nullptr, nullptr)); return; diff --git a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc index 7645df8014d..a8369b26471 100644 --- a/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc +++ b/chromium/content/browser/speech/speech_recognizer_impl_unittest.cc @@ -18,9 +18,9 @@ #include "base/test/scoped_feature_list.h" #include "base/threading/thread.h" #include "base/threading/thread_task_runner_handle.h" -#include "content/browser/speech/proto/google_streaming_api.pb.h" #include "content/browser/speech/speech_recognition_engine.h" #include "content/browser/speech/speech_recognizer_impl.h" +#include "content/public/browser/google_streaming_api.pb.h" #include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/common/content_features.h" #include "content/public/test/browser_task_environment.h" diff --git a/chromium/content/browser/speech/speech_synthesis_impl.cc b/chromium/content/browser/speech/speech_synthesis_impl.cc index 1ab5ad01656..e46103bcb66 100644 --- a/chromium/content/browser/speech/speech_synthesis_impl.cc +++ b/chromium/content/browser/speech/speech_synthesis_impl.cc @@ -4,6 +4,8 @@ #include "content/browser/speech/speech_synthesis_impl.h" +#include "content/browser/speech/tts_utterance_impl.h" + namespace content { namespace { @@ -85,9 +87,11 @@ void SendVoiceListToObserver( } // namespace -SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context) - : browser_context_(browser_context) { +SpeechSynthesisImpl::SpeechSynthesisImpl(BrowserContext* browser_context, + WebContents* web_contents) + : browser_context_(browser_context), web_contents_(web_contents) { DCHECK(browser_context_); + DCHECK(web_contents_); TtsController::GetInstance()->AddVoicesChangedDelegate(this); } @@ -120,8 +124,8 @@ void SpeechSynthesisImpl::AddVoiceListObserver( void SpeechSynthesisImpl::Speak( blink::mojom::SpeechSynthesisUtterancePtr utterance, mojo::PendingRemote<blink::mojom::SpeechSynthesisClient> client) { - std::unique_ptr<TtsUtterance> tts_utterance( - TtsUtterance::Create((browser_context_))); + std::unique_ptr<TtsUtterance> tts_utterance = + std::make_unique<TtsUtteranceImpl>(browser_context_, web_contents_); tts_utterance->SetText(utterance->text); tts_utterance->SetLang(utterance->lang); tts_utterance->SetVoiceName(utterance->voice); diff --git a/chromium/content/browser/speech/speech_synthesis_impl.h b/chromium/content/browser/speech/speech_synthesis_impl.h index 7db29e521cb..96cdacf46eb 100644 --- a/chromium/content/browser/speech/speech_synthesis_impl.h +++ b/chromium/content/browser/speech/speech_synthesis_impl.h @@ -12,6 +12,7 @@ namespace content { class BrowserContext; +class WebContents; // Back-end for the web speech synthesis API; dispatches speech requests to // content::TtsController and forwards voice lists and events back to the @@ -19,7 +20,8 @@ class BrowserContext; class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis, public VoicesChangedDelegate { public: - explicit SpeechSynthesisImpl(BrowserContext* browser_context); + SpeechSynthesisImpl(BrowserContext* browser_context, + WebContents* web_contents); ~SpeechSynthesisImpl() override; SpeechSynthesisImpl(const SpeechSynthesisImpl&) = delete; @@ -44,6 +46,8 @@ class SpeechSynthesisImpl : public blink::mojom::SpeechSynthesis, private: BrowserContext* browser_context_; + WebContents* web_contents_; + mojo::ReceiverSet<blink::mojom::SpeechSynthesis> receiver_set_; mojo::RemoteSet<blink::mojom::SpeechSynthesisVoiceListObserver> observer_set_; }; diff --git a/chromium/content/browser/speech/tts_controller_impl.cc b/chromium/content/browser/speech/tts_controller_impl.cc index 108a5a7d0a6..e34045ca4e1 100644 --- a/chromium/content/browser/speech/tts_controller_impl.cc +++ b/chromium/content/browser/speech/tts_controller_impl.cc @@ -6,6 +6,7 @@ #include <stddef.h> +#include <algorithm> #include <string> #include <vector> @@ -16,20 +17,47 @@ #include "base/metrics/user_metrics.h" #include "base/values.h" #include "build/build_config.h" +#include "content/browser/speech/tts_utterance_impl.h" #include "content/public/browser/content_browser_client.h" +#include "content/public/browser/visibility.h" +#include "content/public/browser/web_contents.h" #include "content/public/common/content_client.h" #include "services/data_decoder/public/cpp/safe_xml_parser.h" #include "services/data_decoder/public/mojom/xml_parser.mojom.h" #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h" +#include "ui/base/l10n/l10n_util.h" -namespace content { +#if defined(OS_CHROMEOS) +#include "content/public/browser/tts_controller_delegate.h" +#endif +namespace content { +namespace { // A value to be used to indicate that there is no char index available. const int kInvalidCharIndex = -1; // A value to be used to indicate that there is no length available. const int kInvalidLength = -1; +#if defined(OS_CHROMEOS) +bool VoiceIdMatches( + const base::Optional<TtsControllerDelegate::PreferredVoiceId>& id, + const content::VoiceData& voice) { + if (!id.has_value() || voice.name.empty() || + (voice.engine_id.empty() && !voice.native)) + return false; + if (voice.native) + return id->name == voice.name && id->id.empty(); + return id->name == voice.name && id->id == voice.engine_id; +} +#endif // defined(OS_CHROMEOS) + +TtsUtteranceImpl* AsUtteranceImpl(TtsUtterance* utterance) { + return static_cast<TtsUtteranceImpl*>(utterance); +} + +} // namespace + // // VoiceData // @@ -77,16 +105,12 @@ TtsControllerImpl* TtsControllerImpl::GetInstance() { return base::Singleton<TtsControllerImpl>::get(); } -TtsControllerImpl::TtsControllerImpl() - : delegate_(nullptr), - current_utterance_(nullptr), - paused_(false), - tts_platform_(nullptr) {} +TtsControllerImpl::TtsControllerImpl() = default; TtsControllerImpl::~TtsControllerImpl() { if (current_utterance_) { current_utterance_->Finish(); - current_utterance_.reset(); + SetCurrentUtterance(nullptr); } // Clear any queued utterances too. @@ -95,17 +119,22 @@ TtsControllerImpl::~TtsControllerImpl() { void TtsControllerImpl::SpeakOrEnqueue( std::unique_ptr<TtsUtterance> utterance) { + if (!ShouldSpeakUtterance(utterance.get())) { + utterance->Finish(); + return; + } + // If we're paused and we get an utterance that can't be queued, // flush the queue but stay in the paused state. if (paused_ && !utterance->GetCanEnqueue()) { - utterance_deque_.emplace_back(std::move(utterance)); + utterance_list_.emplace_back(std::move(utterance)); Stop(); paused_ = true; return; } if (paused_ || (IsSpeaking() && utterance->GetCanEnqueue())) { - utterance_deque_.emplace_back(std::move(utterance)); + utterance_list_.emplace_back(std::move(utterance)); } else { Stop(); SpeakNow(std::move(utterance)); @@ -113,26 +142,30 @@ void TtsControllerImpl::SpeakOrEnqueue( } void TtsControllerImpl::Stop() { - StopInternal(GURL()); + StopAndClearQueue(GURL()); } void TtsControllerImpl::Stop(const GURL& source_url) { - StopInternal(source_url); + StopAndClearQueue(source_url); +} + +void TtsControllerImpl::StopAndClearQueue(const GURL& source_url) { + if (StopCurrentUtteranceIfMatches(source_url)) + ClearUtteranceQueue(true); } -void TtsControllerImpl::StopInternal(const GURL& source_url) { +bool TtsControllerImpl::StopCurrentUtteranceIfMatches(const GURL& source_url) { base::RecordAction(base::UserMetricsAction("TextToSpeech.Stop")); paused_ = false; if (!source_url.is_empty() && current_utterance_ && current_utterance_->GetSrcUrl().GetOrigin() != source_url.GetOrigin()) - return; + return false; if (current_utterance_ && !current_utterance_->GetEngineId().empty()) { - if (GetTtsControllerDelegate()->GetTtsEngineDelegate()) - GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop( - current_utterance_.get()); + if (engine_delegate_) + engine_delegate_->Stop(current_utterance_.get()); } else { GetTtsPlatform()->ClearError(); GetTtsPlatform()->StopSpeaking(); @@ -142,7 +175,7 @@ void TtsControllerImpl::StopInternal(const GURL& source_url) { current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, kInvalidLength, std::string()); FinishCurrentUtterance(); - ClearUtteranceQueue(true); // Send events. + return true; } void TtsControllerImpl::Pause() { @@ -150,9 +183,8 @@ void TtsControllerImpl::Pause() { paused_ = true; if (current_utterance_ && !current_utterance_->GetEngineId().empty()) { - if (GetTtsControllerDelegate()->GetTtsEngineDelegate()) - GetTtsControllerDelegate()->GetTtsEngineDelegate()->Pause( - current_utterance_.get()); + if (engine_delegate_) + engine_delegate_->Pause(current_utterance_.get()); } else if (current_utterance_) { GetTtsPlatform()->ClearError(); GetTtsPlatform()->Pause(); @@ -164,9 +196,8 @@ void TtsControllerImpl::Resume() { paused_ = false; if (current_utterance_ && !current_utterance_->GetEngineId().empty()) { - if (GetTtsControllerDelegate()->GetTtsEngineDelegate()) - GetTtsControllerDelegate()->GetTtsEngineDelegate()->Resume( - current_utterance_.get()); + if (engine_delegate_) + engine_delegate_->Resume(current_utterance_.get()); } else if (current_utterance_) { GetTtsPlatform()->ClearError(); GetTtsPlatform()->Resume(); @@ -245,11 +276,8 @@ void TtsControllerImpl::GetVoices(BrowserContext* browser_context, tts_platform->GetVoices(out_voices); } - if (browser_context) { - TtsControllerDelegate* delegate = GetTtsControllerDelegate(); - if (delegate && delegate->GetTtsEngineDelegate()) - delegate->GetTtsEngineDelegate()->GetVoices(browser_context, out_voices); - } + if (browser_context && engine_delegate_) + engine_delegate_->GetVoices(browser_context, out_voices); } bool TtsControllerImpl::IsSpeaking() { @@ -276,22 +304,21 @@ void TtsControllerImpl::RemoveVoicesChangedDelegate( void TtsControllerImpl::RemoveUtteranceEventDelegate( UtteranceEventDelegate* delegate) { // First clear any pending utterances with this delegate. - std::deque<std::unique_ptr<TtsUtterance>> old_deque; - utterance_deque_.swap(old_deque); - while (!old_deque.empty()) { - std::unique_ptr<TtsUtterance> utterance = std::move(old_deque.front()); - old_deque.pop_front(); + std::list<std::unique_ptr<TtsUtterance>> old_list; + utterance_list_.swap(old_list); + while (!old_list.empty()) { + std::unique_ptr<TtsUtterance> utterance = std::move(old_list.front()); + old_list.pop_front(); if (utterance->GetEventDelegate() != delegate) - utterance_deque_.emplace_back(std::move(utterance)); + utterance_list_.emplace_back(std::move(utterance)); } if (current_utterance_ && current_utterance_->GetEventDelegate() == delegate) { current_utterance_->SetEventDelegate(nullptr); if (!current_utterance_->GetEngineId().empty()) { - if (GetTtsControllerDelegate()->GetTtsEngineDelegate()) - GetTtsControllerDelegate()->GetTtsEngineDelegate()->Stop( - current_utterance_.get()); + if (engine_delegate_) + engine_delegate_->Stop(current_utterance_.get()); } else { GetTtsPlatform()->ClearError(); GetTtsPlatform()->StopSpeaking(); @@ -304,17 +331,11 @@ void TtsControllerImpl::RemoveUtteranceEventDelegate( } void TtsControllerImpl::SetTtsEngineDelegate(TtsEngineDelegate* delegate) { - if (!GetTtsControllerDelegate()) - return; - - GetTtsControllerDelegate()->SetTtsEngineDelegate(delegate); + engine_delegate_ = delegate; } TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() { - if (!GetTtsControllerDelegate()) - return nullptr; - - return GetTtsControllerDelegate()->GetTtsEngineDelegate(); + return engine_delegate_; } void TtsControllerImpl::OnBrowserContextDestroyed( @@ -322,7 +343,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed( bool did_clear_utterances = false; // First clear the BrowserContext from any utterances. - for (std::unique_ptr<TtsUtterance>& utterance : utterance_deque_) { + for (std::unique_ptr<TtsUtterance>& utterance : utterance_list_) { if (utterance->GetBrowserContext() == browser_context) { utterance->ClearBrowserContext(); did_clear_utterances = true; @@ -342,7 +363,7 @@ void TtsControllerImpl::OnBrowserContextDestroyed( // safe to use base::Unretained because this is a singleton. if (did_clear_utterances) { base::ThreadTaskRunnerHandle::Get()->PostTask( - FROM_HERE, base::BindOnce(&TtsControllerImpl::StopInternal, + FROM_HERE, base::BindOnce(&TtsControllerImpl::StopAndClearQueue, base::Unretained(this), GURL())); } } @@ -352,7 +373,7 @@ void TtsControllerImpl::SetTtsPlatform(TtsPlatform* tts_platform) { } int TtsControllerImpl::QueueSize() { - return static_cast<int>(utterance_deque_.size()); + return static_cast<int>(utterance_list_.size()); } TtsPlatform* TtsControllerImpl::GetTtsPlatform() { @@ -362,15 +383,6 @@ TtsPlatform* TtsControllerImpl::GetTtsPlatform() { } void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) { - // Note: this would only happen if a content embedder failed to provide - // their own TtsControllerDelegate. Chrome provides one, and Content Shell - // provides a mock one for web tests. - if (!GetTtsControllerDelegate()) { - utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, - kInvalidLength, std::string()); - return; - } - // Get all available voices and try to find a matching voice. std::vector<VoiceData> voices; GetVoices(utterance->GetBrowserContext(), &voices); @@ -379,8 +391,7 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) { // to true because that might trigger deferred loading of native voices. // TODO(katie): Move most of the GetMatchingVoice logic into content/ and // use the TTS controller delegate to get chrome-specific info as needed. - int index = - GetTtsControllerDelegate()->GetMatchingVoice(utterance.get(), voices); + int index = GetMatchingVoice(utterance.get(), voices); VoiceData voice; if (index >= 0) voice = voices[index]; @@ -411,23 +422,22 @@ void TtsControllerImpl::SpeakNow(std::unique_ptr<TtsUtterance> utterance) { if (!voice.native) { #if !defined(OS_ANDROID) DCHECK(!voice.engine_id.empty()); - current_utterance_ = std::move(utterance); + SetCurrentUtterance(std::move(utterance)); current_utterance_->SetEngineId(voice.engine_id); - if (GetTtsControllerDelegate()->GetTtsEngineDelegate()) - GetTtsControllerDelegate()->GetTtsEngineDelegate()->Speak( - current_utterance_.get(), voice); + if (engine_delegate_) + engine_delegate_->Speak(current_utterance_.get(), voice); bool sends_end_event = voice.events.find(TTS_EVENT_END) != voice.events.end(); if (!sends_end_event) { current_utterance_->Finish(); - current_utterance_.reset(); + SetCurrentUtterance(nullptr); SpeakNextUtterance(); } -#endif +#endif // !defined(OS_ANDROID) } else { // It's possible for certain platforms to send start events immediately // during |speak|. - current_utterance_ = std::move(utterance); + SetCurrentUtterance(std::move(utterance)); GetTtsPlatform()->ClearError(); GetTtsPlatform()->Speak( current_utterance_->GetId(), current_utterance_->GetText(), @@ -451,20 +461,20 @@ void TtsControllerImpl::OnSpeakFinished(int utterance_id, bool success) { // the browser has built-in TTS that isn't loaded yet. if (GetTtsPlatform()->LoadBuiltInTtsEngine( current_utterance_->GetBrowserContext())) { - utterance_deque_.emplace_back(std::move(current_utterance_)); + utterance_list_.emplace_back(std::move(current_utterance_)); return; } current_utterance_->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, kInvalidLength, GetTtsPlatform()->GetError()); - current_utterance_.reset(); + SetCurrentUtterance(nullptr); } void TtsControllerImpl::ClearUtteranceQueue(bool send_events) { - while (!utterance_deque_.empty()) { + while (!utterance_list_.empty()) { std::unique_ptr<TtsUtterance> utterance = - std::move(utterance_deque_.front()); - utterance_deque_.pop_front(); + std::move(utterance_list_.front()); + utterance_list_.pop_front(); if (send_events) { utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, kInvalidLength, std::string()); @@ -479,7 +489,7 @@ void TtsControllerImpl::FinishCurrentUtterance() { if (!current_utterance_->IsFinished()) current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, kInvalidLength, std::string()); - current_utterance_.reset(); + SetCurrentUtterance(nullptr); } } @@ -489,11 +499,14 @@ void TtsControllerImpl::SpeakNextUtterance() { // Start speaking the next utterance in the queue. Keep trying in case // one fails but there are still more in the queue to try. - while (!utterance_deque_.empty() && !current_utterance_) { + while (!utterance_list_.empty() && !current_utterance_) { std::unique_ptr<TtsUtterance> utterance = - std::move(utterance_deque_.front()); - utterance_deque_.pop_front(); - SpeakNow(std::move(utterance)); + std::move(utterance_list_.front()); + utterance_list_.pop_front(); + if (ShouldSpeakUtterance(utterance.get())) + SpeakNow(std::move(utterance)); + else + utterance->Finish(); } } @@ -502,8 +515,9 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) { double pitch = utterance->GetContinuousParameters().pitch; double volume = utterance->GetContinuousParameters().volume; #if defined(OS_CHROMEOS) - GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs(utterance, &rate, - &pitch, &volume); + if (GetTtsControllerDelegate()) + GetTtsControllerDelegate()->UpdateUtteranceDefaultsFromPrefs( + utterance, &rate, &pitch, &volume); #else // Update pitch, rate and volume to defaults if not explicity set on // this utterance. @@ -517,14 +531,8 @@ void TtsControllerImpl::UpdateUtteranceDefaults(TtsUtterance* utterance) { utterance->SetContinuousParameters(rate, pitch, volume); } -TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() { - if (delegate_) - return delegate_; - if (GetContentClient() && GetContentClient()->browser()) { - delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate(); - return delegate_; - } - return nullptr; +void TtsControllerImpl::SetStopSpeakingWhenHidden(bool value) { + stop_speaking_when_hidden_ = value; } void TtsControllerImpl::StripSSML( @@ -595,4 +603,175 @@ void TtsControllerImpl::PopulateParsedText(std::string* parsed_text, } } +int TtsControllerImpl::GetMatchingVoice(TtsUtterance* utterance, + const std::vector<VoiceData>& voices) { + const std::string app_lang = + GetContentClient()->browser()->GetApplicationLocale(); + // Start with a best score of -1, that way even if none of the criteria + // match, something will be returned if there are any voices. + int best_score = -1; + int best_score_index = -1; +#if defined(OS_CHROMEOS) + TtsControllerDelegate* delegate = GetTtsControllerDelegate(); + std::unique_ptr<TtsControllerDelegate::PreferredVoiceIds> preferred_ids = + delegate ? delegate->GetPreferredVoiceIdsForUtterance(utterance) + : nullptr; +#endif // defined(OS_CHROMEOS) + for (size_t i = 0; i < voices.size(); ++i) { + const content::VoiceData& voice = voices[i]; + int score = 0; + + // If the extension ID is specified, check for an exact match. + if (!utterance->GetEngineId().empty() && + utterance->GetEngineId() != voice.engine_id) + continue; + + // If the voice name is specified, check for an exact match. + if (!utterance->GetVoiceName().empty() && + voice.name != utterance->GetVoiceName()) + continue; + + // Prefer the utterance language. + if (!voice.lang.empty() && !utterance->GetLang().empty()) { + // An exact language match is worth more than a partial match. + if (voice.lang == utterance->GetLang()) { + score += 128; + } else if (l10n_util::GetLanguage(voice.lang) == + l10n_util::GetLanguage(utterance->GetLang())) { + score += 64; + } + } + + // Next, prefer required event types. + if (!utterance->GetRequiredEventTypes().empty()) { + bool has_all_required_event_types = true; + for (TtsEventType event_type : utterance->GetRequiredEventTypes()) { + if (voice.events.find(event_type) == voice.events.end()) { + has_all_required_event_types = false; + break; + } + } + if (has_all_required_event_types) + score += 32; + } + +#if defined(OS_CHROMEOS) + if (preferred_ids) { + // First prefer the user's preference voice for the utterance language, + // if the utterance language is specified. + if (!utterance->GetLang().empty() && + VoiceIdMatches(preferred_ids->lang_voice_id, voice)) { + score += 16; + } + + // Then prefer the user's preference voice for the system language. + // This is a lower priority match than the utterance voice. + if (VoiceIdMatches(preferred_ids->locale_voice_id, voice)) + score += 8; + + // Finally, prefer the user's preference voice for any language. This will + // pick the default voice if there is no better match for the current + // system language and utterance language. + if (VoiceIdMatches(preferred_ids->any_locale_voice_id, voice)) + score += 4; + } +#endif // defined(OS_CHROMEOS) + + // Finally, prefer system language. + if (!voice.lang.empty()) { + if (voice.lang == app_lang) { + score += 2; + } else if (l10n_util::GetLanguage(voice.lang) == + l10n_util::GetLanguage(app_lang)) { + score += 1; + } + } + + if (score > best_score) { + best_score = score; + best_score_index = i; + } + } + + return best_score_index; +} + +void TtsControllerImpl::SetCurrentUtterance( + std::unique_ptr<TtsUtterance> utterance) { + current_utterance_ = std::move(utterance); + Observe(current_utterance_ + ? AsUtteranceImpl(current_utterance_.get())->web_contents() + : nullptr); +} + +void TtsControllerImpl::StopCurrentUtteranceAndRemoveUtterancesMatching( + WebContents* wc) { + DCHECK(wc); + // Removes any utterances that match the WebContents from the current + // utterance (which our inherited WebContentsObserver starts observing every + // time the utterance changes). + // + // This is called when the WebContents for the current utterance is destroyed + // or hidden. In the case where it's destroyed, this is done to avoid + // attempting to start a utterance that is very likely to be destroyed right + // away, and there are also subtle timing issues if we didn't do this (if a + // queued utterance has already received WebContentsDestroyed(), and we start + // it, we won't get the corresponding WebContentsDestroyed()). + auto eraser = [wc](const std::unique_ptr<TtsUtterance>& utterance) { + TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance.get()); + if (utterance_impl->web_contents() == wc) { + utterance_impl->Finish(); + return true; + } + return false; + }; + utterance_list_.erase( + std::remove_if(utterance_list_.begin(), utterance_list_.end(), eraser), + utterance_list_.end()); + const bool stopped = StopCurrentUtteranceIfMatches(GURL()); + DCHECK(stopped); + SpeakNextUtterance(); +} + +bool TtsControllerImpl::ShouldSpeakUtterance(TtsUtterance* utterance) { + TtsUtteranceImpl* utterance_impl = AsUtteranceImpl(utterance); + if (!utterance_impl->was_created_with_web_contents()) + return true; + + // If the WebContents that created the utterance has been destroyed, don't + // speak it. + if (!utterance_impl->web_contents()) + return false; + + // Allow speaking if either the WebContents is visible, or the WebContents + // isn't required to be visible before speaking. + return !stop_speaking_when_hidden_ || + utterance_impl->web_contents()->GetVisibility() != Visibility::HIDDEN; +} + +// +// WebContentsObserver +// + +void TtsControllerImpl::WebContentsDestroyed() { + StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents()); +} + +void TtsControllerImpl::OnVisibilityChanged(Visibility visibility) { + if (visibility == Visibility::HIDDEN && stop_speaking_when_hidden_) + StopCurrentUtteranceAndRemoveUtterancesMatching(web_contents()); +} + +#if defined(OS_CHROMEOS) +TtsControllerDelegate* TtsControllerImpl::GetTtsControllerDelegate() { + if (delegate_) + return delegate_; + if (GetContentClient() && GetContentClient()->browser()) { + delegate_ = GetContentClient()->browser()->GetTtsControllerDelegate(); + return delegate_; + } + return nullptr; +} +#endif // defined(OS_CHROMEOS) + } // namespace content diff --git a/chromium/content/browser/speech/tts_controller_impl.h b/chromium/content/browser/speech/tts_controller_impl.h index 052a8841be9..638c3691d6d 100644 --- a/chromium/content/browser/speech/tts_controller_impl.h +++ b/chromium/content/browser/speech/tts_controller_impl.h @@ -5,9 +5,8 @@ #ifndef CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_ #define CONTENT_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_ -#include <deque> +#include <list> #include <memory> -#include <set> #include <string> #include <vector> @@ -22,18 +21,23 @@ #include "build/build_config.h" #include "content/common/content_export.h" #include "content/public/browser/tts_controller.h" -#include "content/public/browser/tts_controller_delegate.h" #include "content/public/browser/tts_platform.h" +#include "content/public/browser/web_contents_observer.h" #include "services/data_decoder/public/cpp/data_decoder.h" #include "url/gurl.h" namespace content { class BrowserContext; +#if defined(OS_CHROMEOS) +class TtsControllerDelegate; +#endif + // Singleton class that manages text-to-speech for all TTS engines and // APIs, maintaining a queue of pending utterances and keeping // track of all state. -class CONTENT_EXPORT TtsControllerImpl : public TtsController { +class CONTENT_EXPORT TtsControllerImpl : public TtsController, + public WebContentsObserver { public: // Get the single instance of this class. static TtsControllerImpl* GetInstance(); @@ -58,6 +62,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController { void RemoveUtteranceEventDelegate(UtteranceEventDelegate* delegate) override; void SetTtsEngineDelegate(TtsEngineDelegate* delegate) override; TtsEngineDelegate* GetTtsEngineDelegate() override; + void SetStopSpeakingWhenHidden(bool value) override; // Called directly by ~BrowserContext, because a raw BrowserContext pointer // is stored in an Utterance. @@ -77,6 +82,7 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController { ~TtsControllerImpl() override; private: + friend class TtsControllerTestHelper; FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestTtsControllerShutdown); FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, TestGetMatchingVoice); FRIEND_TEST_ALL_PREFIXES(TtsControllerTest, @@ -92,7 +98,13 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController { // |utterance| or delete it if there's an error. Returns true on success. void SpeakNow(std::unique_ptr<TtsUtterance> utterance); - void StopInternal(const GURL& source_url); + // If the current utterance matches |source_url|, it is stopped and the + // utterance queue cleared. + void StopAndClearQueue(const GURL& source_url); + + // Stops the current utterance if it matches |source_url|. Returns true on + // success, false if the current utterance does not match |source_url|. + bool StopCurrentUtteranceIfMatches(const GURL& source_url); // Clear the utterance queue. If send_events is true, will send // TTS_EVENT_CANCELLED events on each one. @@ -120,9 +132,31 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController { static void PopulateParsedText(std::string* parsed_text, const base::Value* element); + int GetMatchingVoice(TtsUtterance* utterance, + const std::vector<VoiceData>& voices); + + // Called internally to set |current_utterance_|. + void SetCurrentUtterance(std::unique_ptr<TtsUtterance> utterance); + + // Used when the WebContents of the current utterance is destroyed/hidden. + void StopCurrentUtteranceAndRemoveUtterancesMatching(WebContents* wc); + + // Returns true if the utterance should be spoken. + bool ShouldSpeakUtterance(TtsUtterance* utterance); + + // WebContentsObserver methods + void WebContentsDestroyed() override; + void OnVisibilityChanged(Visibility visibility) override; + +#if defined(OS_CHROMEOS) TtsControllerDelegate* GetTtsControllerDelegate(); - TtsControllerDelegate* delegate_; + TtsControllerDelegate* delegate_ = nullptr; +#endif + + TtsEngineDelegate* engine_delegate_ = nullptr; + + bool stop_speaking_when_hidden_ = false; // A set of delegates that want to be notified when the voices change. base::ObserverList<VoicesChangedDelegate> voices_changed_delegates_; @@ -131,14 +165,14 @@ class CONTENT_EXPORT TtsControllerImpl : public TtsController { std::unique_ptr<TtsUtterance> current_utterance_; // Whether the queue is paused or not. - bool paused_; + bool paused_ = false; // A pointer to the platform implementation of text-to-speech, for // dependency injection. - TtsPlatform* tts_platform_; + TtsPlatform* tts_platform_ = nullptr; // A queue of utterances to speak after the current one finishes. - std::deque<std::unique_ptr<TtsUtterance>> utterance_deque_; + std::list<std::unique_ptr<TtsUtterance>> utterance_list_; DISALLOW_COPY_AND_ASSIGN(TtsControllerImpl); }; diff --git a/chromium/content/browser/speech/tts_controller_unittest.cc b/chromium/content/browser/speech/tts_controller_unittest.cc index 2282f8db739..593814ad203 100644 --- a/chromium/content/browser/speech/tts_controller_unittest.cc +++ b/chromium/content/browser/speech/tts_controller_unittest.cc @@ -4,25 +4,39 @@ // Unit tests for the TTS Controller. +#include "content/browser/speech/tts_controller_impl.h" + #include "base/memory/ptr_util.h" #include "base/values.h" -#include "content/browser/speech/tts_controller_impl.h" -#include "content/public/browser/tts_controller_delegate.h" +#include "content/browser/speech/tts_utterance_impl.h" #include "content/public/browser/tts_platform.h" +#include "content/public/browser/visibility.h" #include "content/public/test/browser_task_environment.h" #include "content/public/test/test_browser_context.h" +#include "content/public/test/test_renderer_host.h" +#include "content/test/test_content_browser_client.h" +#include "content/test/test_web_contents.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h" -namespace content { +#if defined(OS_CHROMEOS) +#include "content/public/browser/tts_controller_delegate.h" +#endif -class TtsControllerTest : public testing::Test {}; +namespace content { // Platform Tts implementation that does nothing. class MockTtsPlatformImpl : public TtsPlatform { public: - MockTtsPlatformImpl() {} - virtual ~MockTtsPlatformImpl() {} + MockTtsPlatformImpl() = default; + virtual ~MockTtsPlatformImpl() = default; + + void set_voices(const std::vector<VoiceData>& voices) { voices_ = voices; } + + void set_run_speak_callback(bool value) { run_speak_callback_ = value; } + void set_is_speaking(bool value) { is_speaking_ = value; } + + // TtsPlatform: bool PlatformImplAvailable() override { return true; } void Speak(int utterance_id, const std::string& utterance, @@ -30,13 +44,16 @@ class MockTtsPlatformImpl : public TtsPlatform { const VoiceData& voice, const UtteranceContinuousParameters& params, base::OnceCallback<void(bool)> on_speak_finished) override { - std::move(on_speak_finished).Run(true); + if (run_speak_callback_) + std::move(on_speak_finished).Run(true); } - bool IsSpeaking() override { return false; } + bool IsSpeaking() override { return is_speaking_; } bool StopSpeaking() override { return true; } void Pause() override {} void Resume() override {} - void GetVoices(std::vector<VoiceData>* out_voices) override {} + void GetVoices(std::vector<VoiceData>* out_voices) override { + *out_voices = voices_; + } bool LoadBuiltInTtsEngine(BrowserContext* browser_context) override { return false; } @@ -45,12 +62,20 @@ class MockTtsPlatformImpl : public TtsPlatform { void SetError(const std::string& error) override {} std::string GetError() override { return std::string(); } void ClearError() override {} + + private: + std::vector<VoiceData> voices_; + bool run_speak_callback_ = true; + bool is_speaking_ = false; }; +#if defined(OS_CHROMEOS) class MockTtsControllerDelegate : public TtsControllerDelegate { public: - MockTtsControllerDelegate() {} - ~MockTtsControllerDelegate() override {} + MockTtsControllerDelegate() = default; + ~MockTtsControllerDelegate() override = default; + + void SetPreferredVoiceIds(const PreferredVoiceIds& ids) { ids_ = ids; } BrowserContext* GetLastBrowserContext() { BrowserContext* result = last_browser_context_; @@ -58,10 +83,12 @@ class MockTtsControllerDelegate : public TtsControllerDelegate { return result; } - int GetMatchingVoice(content::TtsUtterance* utterance, - std::vector<content::VoiceData>& voices) override { + // TtsControllerDelegate: + std::unique_ptr<PreferredVoiceIds> GetPreferredVoiceIdsForUtterance( + TtsUtterance* utterance) override { last_browser_context_ = utterance->GetBrowserContext(); - return -1; + auto ids = std::make_unique<PreferredVoiceIds>(ids_); + return ids; } void UpdateUtteranceDefaultsFromPrefs(content::TtsUtterance* utterance, @@ -69,15 +96,11 @@ class MockTtsControllerDelegate : public TtsControllerDelegate { double* pitch, double* volume) override {} - void SetTtsEngineDelegate(content::TtsEngineDelegate* delegate) override {} - - content::TtsEngineDelegate* GetTtsEngineDelegate() override { - return nullptr; - } - private: BrowserContext* last_browser_context_ = nullptr; + PreferredVoiceIds ids_; }; +#endif // Subclass of TtsController with a public ctor and dtor. class TtsControllerForTesting : public TtsControllerImpl { @@ -86,11 +109,14 @@ class TtsControllerForTesting : public TtsControllerImpl { ~TtsControllerForTesting() override {} }; -TEST_F(TtsControllerTest, TestTtsControllerShutdown) { +TEST(TtsControllerTest, TestTtsControllerShutdown) { MockTtsPlatformImpl platform_impl; - TtsControllerForTesting* controller = new TtsControllerForTesting(); - MockTtsControllerDelegate* delegate = new MockTtsControllerDelegate(); - controller->delegate_ = delegate; + std::unique_ptr<TtsControllerForTesting> controller = + std::make_unique<TtsControllerForTesting>(); +#if defined(OS_CHROMEOS) + MockTtsControllerDelegate delegate; + controller->delegate_ = &delegate; +#endif controller->SetTtsPlatform(&platform_impl); @@ -106,13 +132,11 @@ TEST_F(TtsControllerTest, TestTtsControllerShutdown) { // Make sure that deleting the controller when there are pending // utterances doesn't cause a crash. - delete controller; - - // Clean up. - delete delegate; + controller.reset(); } -TEST_F(TtsControllerTest, TestBrowserContextRemoved) { +#if defined(OS_CHROMEOS) +TEST(TtsControllerTest, TestBrowserContextRemoved) { // Create a controller, mock other stuff, and create a test // browser context. TtsControllerImpl* controller = TtsControllerImpl::GetInstance(); @@ -123,9 +147,17 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) { content::BrowserTaskEnvironment task_environment; auto browser_context = std::make_unique<TestBrowserContext>(); + std::vector<VoiceData> voices; + VoiceData voice_data; + voice_data.engine_id = "x"; + voice_data.events.insert(TTS_EVENT_END); + voices.push_back(voice_data); + platform_impl.set_voices(voices); + // Speak an utterances associated with this test browser context. std::unique_ptr<TtsUtterance> utterance1 = TtsUtterance::Create(browser_context.get()); + utterance1->SetEngineId("x"); utterance1->SetCanEnqueue(true); utterance1->SetSrcId(1); controller->SpeakOrEnqueue(std::move(utterance1)); @@ -137,6 +169,7 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) { // this browser context. std::unique_ptr<TtsUtterance> utterance2 = TtsUtterance::Create(browser_context.get()); + utterance2->SetEngineId("x"); utterance2->SetCanEnqueue(true); utterance2->SetSrcId(2); controller->SpeakOrEnqueue(std::move(utterance2)); @@ -150,9 +183,8 @@ TEST_F(TtsControllerTest, TestBrowserContextRemoved) { controller->SpeakNextUtterance(); ASSERT_EQ(nullptr, delegate.GetLastBrowserContext()); } - -#if !defined(OS_CHROMEOS) -TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) { +#else +TEST(TtsControllerTest, TestTtsControllerUtteranceDefaults) { std::unique_ptr<TtsControllerForTesting> controller = std::make_unique<TtsControllerForTesting>(); @@ -175,6 +207,337 @@ TEST_F(TtsControllerTest, TestTtsControllerUtteranceDefaults) { EXPECT_EQ(blink::mojom::kSpeechSynthesisDefaultVolume, utterance1->GetContinuousParameters().volume); } -#endif // !defined(OS_CHROMEOS) +#endif + +TEST(TtsControllerTest, TestGetMatchingVoice) { + std::unique_ptr<TtsControllerForTesting> controller = + std::make_unique<TtsControllerForTesting>(); +#if defined(OS_CHROMEOS) + MockTtsControllerDelegate delegate; + controller->delegate_ = &delegate; +#endif + + TestContentBrowserClient::GetInstance()->set_application_locale("en"); + + { + // Calling GetMatchingVoice with no voices returns -1. + std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr)); + std::vector<VoiceData> voices; + EXPECT_EQ(-1, controller->GetMatchingVoice(utterance.get(), voices)); + } + + { + // Calling GetMatchingVoice with any voices returns the first one + // even if there are no criteria that match. + std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr)); + std::vector<VoiceData> voices(2); + EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices)); + } + + { + // If nothing else matches, the English voice is returned. + // (In tests the language will always be English.) + std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr)); + std::vector<VoiceData> voices; + VoiceData fr_voice; + fr_voice.lang = "fr"; + voices.push_back(fr_voice); + VoiceData en_voice; + en_voice.lang = "en"; + voices.push_back(en_voice); + VoiceData de_voice; + de_voice.lang = "de"; + voices.push_back(de_voice); + EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices)); + } + + { + // Check precedence of various matching criteria. + std::vector<VoiceData> voices; + VoiceData voice0; + voices.push_back(voice0); + VoiceData voice1; + voice1.events.insert(TTS_EVENT_WORD); + voices.push_back(voice1); + VoiceData voice2; + voice2.lang = "de-DE"; + voices.push_back(voice2); + VoiceData voice3; + voice3.lang = "fr-CA"; + voices.push_back(voice3); + VoiceData voice4; + voice4.name = "Voice4"; + voices.push_back(voice4); + VoiceData voice5; + voice5.engine_id = "id5"; + voices.push_back(voice5); + VoiceData voice6; + voice6.engine_id = "id7"; + voice6.name = "Voice6"; + voice6.lang = "es-es"; + voices.push_back(voice6); + VoiceData voice7; + voice7.engine_id = "id7"; + voice7.name = "Voice7"; + voice7.lang = "es-mx"; + voices.push_back(voice7); + VoiceData voice8; + voice8.engine_id = ""; + voice8.name = "Android"; + voice8.lang = ""; + voice8.native = true; + voices.push_back(voice8); + + std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr)); + EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices)); + + std::set<TtsEventType> types; + types.insert(TTS_EVENT_WORD); + utterance->SetRequiredEventTypes(types); + EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices)); + + utterance->SetLang("de-DE"); + EXPECT_EQ(2, controller->GetMatchingVoice(utterance.get(), voices)); + + utterance->SetLang("fr-FR"); + EXPECT_EQ(3, controller->GetMatchingVoice(utterance.get(), voices)); + + utterance->SetVoiceName("Voice4"); + EXPECT_EQ(4, controller->GetMatchingVoice(utterance.get(), voices)); + + utterance->SetVoiceName(""); + utterance->SetEngineId("id5"); + EXPECT_EQ(5, controller->GetMatchingVoice(utterance.get(), voices)); + +#if defined(OS_CHROMEOS) + TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids; + preferred_voice_ids.locale_voice_id.emplace("Voice7", "id7"); + preferred_voice_ids.any_locale_voice_id.emplace("Android", ""); + delegate.SetPreferredVoiceIds(preferred_voice_ids); + + // Voice6 is matched when the utterance locale exactly matches its locale. + utterance->SetEngineId(""); + utterance->SetLang("es-es"); + EXPECT_EQ(6, controller->GetMatchingVoice(utterance.get(), voices)); + + // The 7th voice is the default for "es", even though the utterance is + // "es-ar". |voice6| is not matched because it is not the default. + utterance->SetEngineId(""); + utterance->SetLang("es-ar"); + EXPECT_EQ(7, controller->GetMatchingVoice(utterance.get(), voices)); + + // The 8th voice is like the built-in "Android" voice, it has no lang + // and no extension ID. Make sure it can still be matched. + preferred_voice_ids.locale_voice_id.reset(); + delegate.SetPreferredVoiceIds(preferred_voice_ids); + utterance->SetVoiceName("Android"); + utterance->SetEngineId(""); + utterance->SetLang(""); + EXPECT_EQ(8, controller->GetMatchingVoice(utterance.get(), voices)); + + delegate.SetPreferredVoiceIds({}); +#endif + } + + { + // Check voices against system language. + std::vector<VoiceData> voices; + VoiceData voice0; + voice0.engine_id = "id0"; + voice0.name = "voice0"; + voice0.lang = "en-GB"; + voices.push_back(voice0); + VoiceData voice1; + voice1.engine_id = "id1"; + voice1.name = "voice1"; + voice1.lang = "en-US"; + voices.push_back(voice1); + std::unique_ptr<TtsUtterance> utterance(TtsUtterance::Create(nullptr)); + + // voice1 is matched against the exact default system language. + TestContentBrowserClient::GetInstance()->set_application_locale("en-US"); + utterance->SetLang(""); + EXPECT_EQ(1, controller->GetMatchingVoice(utterance.get(), voices)); + +#if defined(OS_CHROMEOS) + // voice0 is matched against the system language which has no region piece. + TestContentBrowserClient::GetInstance()->set_application_locale("en"); + EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices)); + + TtsControllerDelegate::PreferredVoiceIds preferred_voice_ids2; + preferred_voice_ids2.locale_voice_id.emplace("voice0", "id0"); + delegate.SetPreferredVoiceIds(preferred_voice_ids2); + // voice0 is matched against the pref over the system language. + TestContentBrowserClient::GetInstance()->set_application_locale("en-US"); + EXPECT_EQ(0, controller->GetMatchingVoice(utterance.get(), voices)); +#endif + } +} + +class TtsControllerTestHelper { + public: + TtsControllerTestHelper() { + controller_.SetTtsPlatform(&platform_impl_); + // This ensures utterances don't immediately complete. + platform_impl_.set_run_speak_callback(false); + platform_impl_.set_is_speaking(true); + } + + std::unique_ptr<TestWebContents> CreateWebContents() { + return std::unique_ptr<TestWebContents>( + TestWebContents::Create(&browser_context_, nullptr)); + } + + std::unique_ptr<TtsUtteranceImpl> CreateUtterance(WebContents* web_contents) { + return std::make_unique<TtsUtteranceImpl>(&browser_context_, web_contents); + } + + MockTtsPlatformImpl* platform_impl() { return &platform_impl_; } + + TtsControllerForTesting* controller() { return &controller_; } + + TtsUtterance* TtsControllerCurrentUtterance() { + return controller_.current_utterance_.get(); + } + + bool IsUtteranceListEmpty() { return controller_.utterance_list_.empty(); } + + private: + content::BrowserTaskEnvironment task_environment_; + RenderViewHostTestEnabler rvh_enabler_; + TestBrowserContext browser_context_; + MockTtsPlatformImpl platform_impl_; + TtsControllerForTesting controller_; +}; + +TEST(TtsControllerTest, StopsWhenWebContentsDestroyed) { + TtsControllerTestHelper helper; + std::unique_ptr<WebContents> web_contents = helper.CreateWebContents(); + std::unique_ptr<TtsUtteranceImpl> utterance = + helper.CreateUtterance(web_contents.get()); + + helper.controller()->SpeakOrEnqueue(std::move(utterance)); + EXPECT_TRUE(helper.controller()->IsSpeaking()); + EXPECT_TRUE(helper.TtsControllerCurrentUtterance()); + + web_contents.reset(); + // Destroying the WebContents should reset + // |TtsController::current_utterance_|. + EXPECT_FALSE(helper.TtsControllerCurrentUtterance()); +} + +TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed) { + TtsControllerTestHelper helper; + std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents(); + std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents(); + std::unique_ptr<TtsUtteranceImpl> utterance1 = + helper.CreateUtterance(web_contents1.get()); + void* raw_utterance1 = utterance1.get(); + std::unique_ptr<TtsUtteranceImpl> utterance2 = + helper.CreateUtterance(web_contents2.get()); + utterance2->SetCanEnqueue(true); + void* raw_utterance2 = utterance2.get(); + + helper.controller()->SpeakOrEnqueue(std::move(utterance1)); + EXPECT_TRUE(helper.controller()->IsSpeaking()); + EXPECT_TRUE(helper.TtsControllerCurrentUtterance()); + helper.controller()->SpeakOrEnqueue(std::move(utterance2)); + EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance()); + + web_contents1.reset(); + // Destroying |web_contents1| should delete |utterance1| and start + // |utterance2|. + EXPECT_TRUE(helper.TtsControllerCurrentUtterance()); + EXPECT_EQ(raw_utterance2, helper.TtsControllerCurrentUtterance()); +} + +TEST(TtsControllerTest, StartsQueuedUtteranceWhenWebContentsDestroyed2) { + TtsControllerTestHelper helper; + std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents(); + std::unique_ptr<WebContents> web_contents2 = helper.CreateWebContents(); + std::unique_ptr<TtsUtteranceImpl> utterance1 = + helper.CreateUtterance(web_contents1.get()); + void* raw_utterance1 = utterance1.get(); + std::unique_ptr<TtsUtteranceImpl> utterance2 = + helper.CreateUtterance(web_contents1.get()); + std::unique_ptr<TtsUtteranceImpl> utterance3 = + helper.CreateUtterance(web_contents2.get()); + void* raw_utterance3 = utterance3.get(); + utterance2->SetCanEnqueue(true); + utterance3->SetCanEnqueue(true); + + helper.controller()->SpeakOrEnqueue(std::move(utterance1)); + helper.controller()->SpeakOrEnqueue(std::move(utterance2)); + helper.controller()->SpeakOrEnqueue(std::move(utterance3)); + EXPECT_TRUE(helper.controller()->IsSpeaking()); + EXPECT_EQ(raw_utterance1, helper.TtsControllerCurrentUtterance()); + + web_contents1.reset(); + // Deleting |web_contents1| should delete |utterance1| and |utterance2| as + // they are both from |web_contents1|. |raw_utterance3| should be made the + // current as it's from a different WebContents. + EXPECT_EQ(raw_utterance3, helper.TtsControllerCurrentUtterance()); + EXPECT_TRUE(helper.IsUtteranceListEmpty()); + + web_contents2.reset(); + // Deleting |web_contents2| should delete |utterance3| as it's from a + // different WebContents. + EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance()); +} + +TEST(TtsControllerTest, StartsUtteranceWhenWebContentsHidden) { + TtsControllerTestHelper helper; + std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents(); + web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN); + std::unique_ptr<TtsUtteranceImpl> utterance = + helper.CreateUtterance(web_contents.get()); + helper.controller()->SpeakOrEnqueue(std::move(utterance)); + EXPECT_TRUE(helper.controller()->IsSpeaking()); +} + +TEST(TtsControllerTest, + DoesNotStartUtteranceWhenWebContentsHiddenAndStopSpeakingWhenHiddenSet) { + TtsControllerTestHelper helper; + std::unique_ptr<TestWebContents> web_contents = helper.CreateWebContents(); + web_contents->SetVisibilityAndNotifyObservers(Visibility::HIDDEN); + std::unique_ptr<TtsUtteranceImpl> utterance = + helper.CreateUtterance(web_contents.get()); + helper.controller()->SetStopSpeakingWhenHidden(true); + helper.controller()->SpeakOrEnqueue(std::move(utterance)); + EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance()); + EXPECT_TRUE(helper.IsUtteranceListEmpty()); +} + +TEST(TtsControllerTest, SkipsQueuedUtteranceFromHiddenWebContents) { + TtsControllerTestHelper helper; + helper.controller()->SetStopSpeakingWhenHidden(true); + std::unique_ptr<WebContents> web_contents1 = helper.CreateWebContents(); + std::unique_ptr<TestWebContents> web_contents2 = helper.CreateWebContents(); + std::unique_ptr<TtsUtteranceImpl> utterance1 = + helper.CreateUtterance(web_contents1.get()); + const int utterance1_id = utterance1->GetId(); + std::unique_ptr<TtsUtteranceImpl> utterance2 = + helper.CreateUtterance(web_contents2.get()); + utterance2->SetCanEnqueue(true); + + helper.controller()->SpeakOrEnqueue(std::move(utterance1)); + EXPECT_TRUE(helper.TtsControllerCurrentUtterance()); + EXPECT_TRUE(helper.IsUtteranceListEmpty()); + + // Speak |utterance2|, which should get queued. + helper.controller()->SpeakOrEnqueue(std::move(utterance2)); + EXPECT_FALSE(helper.IsUtteranceListEmpty()); + + // Make the second WebContents hidden, this shouldn't change anything in + // TtsController. + web_contents2->SetVisibilityAndNotifyObservers(Visibility::HIDDEN); + EXPECT_FALSE(helper.IsUtteranceListEmpty()); + + // Finish |utterance1|, which should skip |utterance2| because |web_contents2| + // is hidden. + helper.controller()->OnTtsEvent(utterance1_id, TTS_EVENT_END, 0, 0, {}); + EXPECT_EQ(nullptr, helper.TtsControllerCurrentUtterance()); + EXPECT_TRUE(helper.IsUtteranceListEmpty()); +} } // namespace content diff --git a/chromium/content/browser/speech/tts_linux.cc b/chromium/content/browser/speech/tts_linux.cc index 2c6d2d03133..afd36df9ed3 100644 --- a/chromium/content/browser/speech/tts_linux.cc +++ b/chromium/content/browser/speech/tts_linux.cc @@ -14,7 +14,6 @@ #include "base/macros.h" #include "base/memory/singleton.h" #include "base/synchronization/lock.h" -#include "base/task/post_task.h" #include "base/task/thread_pool.h" #include "content/browser/speech/tts_platform_impl.h" #include "content/public/browser/browser_task_traits.h" @@ -128,7 +127,7 @@ void TtsPlatformImplLinux::Initialize() { // spd_open has memory leaks which are hard to suppress. // http://crbug.com/317360 ANNOTATE_SCOPED_MEMORY_LEAK; - conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL, + conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr, SPD_MODE_THREADED); } if (!conn_) @@ -151,7 +150,7 @@ TtsPlatformImplLinux::~TtsPlatformImplLinux() { base::AutoLock lock(initialization_lock_); if (conn_) { libspeechd_loader_.spd_close(conn_); - conn_ = NULL; + conn_ = nullptr; } } @@ -159,14 +158,14 @@ void TtsPlatformImplLinux::Reset() { base::AutoLock lock(initialization_lock_); if (conn_) libspeechd_loader_.spd_close(conn_); - conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", NULL, + conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr, SPD_MODE_THREADED); } bool TtsPlatformImplLinux::PlatformImplAvailable() { if (!initialization_lock_.Try()) return false; - bool result = libspeechd_loader_.loaded() && (conn_ != NULL); + bool result = libspeechd_loader_.loaded() && (conn_ != nullptr); initialization_lock_.Release(); return result; } @@ -345,8 +344,8 @@ void TtsPlatformImplLinux::NotificationCallback(size_t msg_id, // be in a separate thread. if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { current_notification_ = type; - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent, base::Unretained(TtsPlatformImplLinux::GetInstance()), type)); @@ -365,8 +364,8 @@ void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id, // be in a separate thread. if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) { current_notification_ = state; - base::PostTask( - FROM_HERE, {BrowserThread::UI}, + GetUIThreadTaskRunner({})->PostTask( + FROM_HERE, base::BindOnce(&TtsPlatformImplLinux::OnSpeechEvent, base::Unretained(TtsPlatformImplLinux::GetInstance()), state)); diff --git a/chromium/content/browser/speech/tts_utterance_impl.cc b/chromium/content/browser/speech/tts_utterance_impl.cc index 34ff42b55c5..aa6759c3973 100644 --- a/chromium/content/browser/speech/tts_utterance_impl.cc +++ b/chromium/content/browser/speech/tts_utterance_impl.cc @@ -3,6 +3,7 @@ // found in the LICENSE file. #include "content/browser/speech/tts_utterance_impl.h" + #include "base/values.h" #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h" @@ -37,11 +38,14 @@ int TtsUtteranceImpl::next_utterance_id_ = 0; std::unique_ptr<TtsUtterance> TtsUtterance::Create( BrowserContext* browser_context) { - return std::make_unique<TtsUtteranceImpl>(browser_context); + return std::make_unique<TtsUtteranceImpl>(browser_context, nullptr); } -TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context) - : browser_context_(browser_context), +TtsUtteranceImpl::TtsUtteranceImpl(BrowserContext* browser_context, + WebContents* web_contents) + : WebContentsObserver(web_contents), + browser_context_(browser_context), + was_created_with_web_contents_(web_contents != nullptr), id_(next_utterance_id_++), src_id_(-1), can_enqueue_(false), diff --git a/chromium/content/browser/speech/tts_utterance_impl.h b/chromium/content/browser/speech/tts_utterance_impl.h index fc73f7c7570..2b54961acbd 100644 --- a/chromium/content/browser/speech/tts_utterance_impl.h +++ b/chromium/content/browser/speech/tts_utterance_impl.h @@ -5,22 +5,32 @@ #ifndef CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_ #define CONTENT_BROWSER_SPEECH_TTS_UTTERANCE_IMPL_H_ +#include <memory> #include <set> #include <string> -#include "base/values.h" -#include "content/public/browser/tts_controller.h" #include "content/public/browser/tts_utterance.h" +#include "content/public/browser/web_contents_observer.h" + +namespace base { +class Value; +} namespace content { class BrowserContext; +class WebContents; // Implementation of TtsUtterance. -class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance { +class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance, + public WebContentsObserver { public: - TtsUtteranceImpl(BrowserContext* browser_context); + TtsUtteranceImpl(BrowserContext* browser_context, WebContents* web_contents); ~TtsUtteranceImpl() override; + bool was_created_with_web_contents() const { + return was_created_with_web_contents_; + } + // TtsUtterance overrides. void OnTtsEvent(TtsEventType event_type, int char_index, @@ -77,6 +87,9 @@ class CONTENT_EXPORT TtsUtteranceImpl : public TtsUtterance { // The BrowserContext that initiated this utterance. BrowserContext* browser_context_; + // True if the constructor was supplied with a WebContents. + const bool was_created_with_web_contents_; + // The content embedder engine ID of the engine providing TTS for this // utterance, or empty if native TTS is being used. std::string engine_id_; |