// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/renderer/speech_recognition_dispatcher.h" #include #include #include #include "base/strings/utf_string_conversions.h" #include "content/common/speech_recognition_messages.h" #include "content/renderer/render_view_impl.h" #include "media/media_features.h" #include "third_party/WebKit/public/platform/WebString.h" #include "third_party/WebKit/public/platform/WebVector.h" #include "third_party/WebKit/public/web/WebSpeechGrammar.h" #include "third_party/WebKit/public/web/WebSpeechRecognitionParams.h" #include "third_party/WebKit/public/web/WebSpeechRecognitionResult.h" #include "third_party/WebKit/public/web/WebSpeechRecognizerClient.h" #if BUILDFLAG(ENABLE_WEBRTC) #include "content/renderer/media/speech_recognition_audio_sink.h" #endif using blink::WebVector; using blink::WebString; using blink::WebSpeechGrammar; using blink::WebSpeechRecognitionHandle; using blink::WebSpeechRecognitionResult; using blink::WebSpeechRecognitionParams; using blink::WebSpeechRecognizerClient; namespace content { SpeechRecognitionDispatcher::SpeechRecognitionDispatcher( RenderViewImpl* render_view) : RenderViewObserver(render_view), recognizer_client_(NULL), next_id_(1) {} SpeechRecognitionDispatcher::~SpeechRecognitionDispatcher() {} void SpeechRecognitionDispatcher::AbortAllRecognitions() { ResetAudioSink(); Send(new SpeechRecognitionHostMsg_AbortAllRequests( routing_id())); } bool SpeechRecognitionDispatcher::OnMessageReceived( const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(SpeechRecognitionDispatcher, message) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Started, OnRecognitionStarted) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioStarted, OnAudioStarted) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundStarted, OnSoundStarted) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_SoundEnded, OnSoundEnded) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioEnded, OnAudioEnded) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ErrorOccurred, OnErrorOccurred) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_Ended, OnRecognitionEnded) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_ResultRetrieved, OnResultsRetrieved) IPC_MESSAGE_HANDLER(SpeechRecognitionMsg_AudioReceiverReady, OnAudioReceiverReady) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } void SpeechRecognitionDispatcher::OnDestruct() { delete this; } void SpeechRecognitionDispatcher::Start( const WebSpeechRecognitionHandle& handle, const WebSpeechRecognitionParams& params, WebSpeechRecognizerClient* recognizer_client) { DCHECK(!recognizer_client_ || recognizer_client_ == recognizer_client); recognizer_client_ = recognizer_client; #if BUILDFLAG(ENABLE_WEBRTC) const blink::WebMediaStreamTrack track = params.AudioTrack(); if (!track.IsNull()) { // Check if this type of track is allowed by implemented policy. if (SpeechRecognitionAudioSink::IsSupportedTrack(track)) { audio_track_.Assign(track); } else { audio_track_.Reset(); // Notify user that the track used is not supported. recognizer_client_->DidReceiveError( handle, WebString("Provided audioTrack is not supported."), WebSpeechRecognizerClient::kAudioCaptureError); return; } } // Destroy any previous instance to detach from the audio track. // Each new session should reinstantiate the provider once the track is ready. ResetAudioSink(); #endif SpeechRecognitionHostMsg_StartRequest_Params msg_params; for (size_t i = 0; i < params.Grammars().size(); ++i) { const WebSpeechGrammar& grammar = params.Grammars()[i]; msg_params.grammars.push_back(SpeechRecognitionGrammar( grammar.Src().GetString().Utf8(), grammar.Weight())); } msg_params.language = params.Language().Utf8(); msg_params.max_hypotheses = static_cast(params.MaxAlternatives()); msg_params.continuous = params.Continuous(); msg_params.interim_results = params.InterimResults(); msg_params.origin_url = params.Origin().ToString().Utf8(); msg_params.render_view_id = routing_id(); msg_params.request_id = GetOrCreateIDForHandle(handle); #if BUILDFLAG(ENABLE_WEBRTC) // Fall back to default input when the track is not allowed. msg_params.using_audio_track = !audio_track_.IsNull(); #else msg_params.using_audio_track = false; #endif // The handle mapping will be removed in |OnRecognitionEnd|. Send(new SpeechRecognitionHostMsg_StartRequest(msg_params)); } void SpeechRecognitionDispatcher::Stop( const WebSpeechRecognitionHandle& handle, WebSpeechRecognizerClient* recognizer_client) { ResetAudioSink(); // Ignore a |stop| issued without a matching |start|. if (recognizer_client_ != recognizer_client || !HandleExists(handle)) return; Send(new SpeechRecognitionHostMsg_StopCaptureRequest( routing_id(), GetOrCreateIDForHandle(handle))); } void SpeechRecognitionDispatcher::Abort( const WebSpeechRecognitionHandle& handle, WebSpeechRecognizerClient* recognizer_client) { ResetAudioSink(); // Ignore an |abort| issued without a matching |start|. if (recognizer_client_ != recognizer_client || !HandleExists(handle)) return; Send(new SpeechRecognitionHostMsg_AbortRequest( routing_id(), GetOrCreateIDForHandle(handle))); } void SpeechRecognitionDispatcher::OnRecognitionStarted(int request_id) { recognizer_client_->DidStart(GetHandleFromID(request_id)); } void SpeechRecognitionDispatcher::OnAudioStarted(int request_id) { recognizer_client_->DidStartAudio(GetHandleFromID(request_id)); } void SpeechRecognitionDispatcher::OnSoundStarted(int request_id) { recognizer_client_->DidStartSound(GetHandleFromID(request_id)); } void SpeechRecognitionDispatcher::OnSoundEnded(int request_id) { recognizer_client_->DidEndSound(GetHandleFromID(request_id)); } void SpeechRecognitionDispatcher::OnAudioEnded(int request_id) { recognizer_client_->DidEndAudio(GetHandleFromID(request_id)); } static WebSpeechRecognizerClient::ErrorCode WebKitErrorCode( SpeechRecognitionErrorCode e) { switch (e) { case SPEECH_RECOGNITION_ERROR_NONE: NOTREACHED(); return WebSpeechRecognizerClient::kOtherError; case SPEECH_RECOGNITION_ERROR_NO_SPEECH: return WebSpeechRecognizerClient::kNoSpeechError; case SPEECH_RECOGNITION_ERROR_ABORTED: return WebSpeechRecognizerClient::kAbortedError; case SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE: return WebSpeechRecognizerClient::kAudioCaptureError; case SPEECH_RECOGNITION_ERROR_NETWORK: return WebSpeechRecognizerClient::kNetworkError; case SPEECH_RECOGNITION_ERROR_NOT_ALLOWED: return WebSpeechRecognizerClient::kNotAllowedError; case SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED: return WebSpeechRecognizerClient::kServiceNotAllowedError; case SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR: return WebSpeechRecognizerClient::kBadGrammarError; case SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED: return WebSpeechRecognizerClient::kLanguageNotSupportedError; case SPEECH_RECOGNITION_ERROR_NO_MATCH: NOTREACHED(); return WebSpeechRecognizerClient::kOtherError; } NOTREACHED(); return WebSpeechRecognizerClient::kOtherError; } void SpeechRecognitionDispatcher::OnErrorOccurred( int request_id, const SpeechRecognitionError& error) { if (error.code == SPEECH_RECOGNITION_ERROR_NO_MATCH) { recognizer_client_->DidReceiveNoMatch(GetHandleFromID(request_id), WebSpeechRecognitionResult()); } else { ResetAudioSink(); recognizer_client_->DidReceiveError( GetHandleFromID(request_id), WebString(), // TODO(primiano): message? WebKitErrorCode(error.code)); } } void SpeechRecognitionDispatcher::OnRecognitionEnded(int request_id) { // TODO(tommi): It is possible that the handle isn't found in the array if // the user just refreshed the page. It seems that we then get a notification // for the previously loaded instance of the page. HandleMap::iterator iter = handle_map_.find(request_id); if (iter == handle_map_.end()) { DLOG(ERROR) << "OnRecognitionEnded called for a handle that doesn't exist"; } else { WebSpeechRecognitionHandle handle = iter->second; // Note: we need to erase the handle from the map *before* calling didEnd. // didEnd may call back synchronously to start a new recognition session, // and we don't want to delete the handle from the map after that happens. handle_map_.erase(request_id); ResetAudioSink(); recognizer_client_->DidEnd(handle); } } void SpeechRecognitionDispatcher::OnResultsRetrieved( int request_id, const SpeechRecognitionResults& results) { size_t provisional_count = 0; SpeechRecognitionResults::const_iterator it = results.begin(); for (; it != results.end(); ++it) { if (it->is_provisional) ++provisional_count; } WebVector provisional(provisional_count); WebVector final( results.size() - provisional_count); int provisional_index = 0, final_index = 0; for (it = results.begin(); it != results.end(); ++it) { const SpeechRecognitionResult& result = (*it); WebSpeechRecognitionResult* webkit_result = result.is_provisional ? &provisional[provisional_index++] : &final[final_index++]; const size_t num_hypotheses = result.hypotheses.size(); WebVector transcripts(num_hypotheses); WebVector confidences(num_hypotheses); for (size_t i = 0; i < num_hypotheses; ++i) { transcripts[i] = WebString::FromUTF16(result.hypotheses[i].utterance); confidences[i] = static_cast(result.hypotheses[i].confidence); } webkit_result->Assign(transcripts, confidences, !result.is_provisional); } recognizer_client_->DidReceiveResults(GetHandleFromID(request_id), final, provisional); } void SpeechRecognitionDispatcher::OnAudioReceiverReady( int request_id, const media::AudioParameters& params, const base::SharedMemoryHandle memory, const base::SyncSocket::TransitDescriptor descriptor) { #if BUILDFLAG(ENABLE_WEBRTC) DCHECK(!speech_audio_sink_.get()); if (audio_track_.IsNull()) { ResetAudioSink(); return; } // The instantiation and type of SyncSocket is up to the client since it // is dependency injected to the SpeechRecognitionAudioSink. std::unique_ptr socket(new base::CancelableSyncSocket( base::SyncSocket::UnwrapHandle(descriptor))); speech_audio_sink_.reset(new SpeechRecognitionAudioSink( audio_track_, params, memory, std::move(socket), base::Bind(&SpeechRecognitionDispatcher::ResetAudioSink, base::Unretained(this)))); #endif } int SpeechRecognitionDispatcher::GetOrCreateIDForHandle( const WebSpeechRecognitionHandle& handle) { // Search first for an existing mapping. for (HandleMap::iterator iter = handle_map_.begin(); iter != handle_map_.end(); ++iter) { if (iter->second.Equals(handle)) return iter->first; } // If no existing mapping found, create a new one. const int new_id = next_id_; handle_map_[new_id] = handle; ++next_id_; return new_id; } bool SpeechRecognitionDispatcher::HandleExists( const WebSpeechRecognitionHandle& handle) { for (HandleMap::iterator iter = handle_map_.begin(); iter != handle_map_.end(); ++iter) { if (iter->second.Equals(handle)) return true; } return false; } void SpeechRecognitionDispatcher::ResetAudioSink() { #if BUILDFLAG(ENABLE_WEBRTC) speech_audio_sink_.reset(); #endif } const WebSpeechRecognitionHandle& SpeechRecognitionDispatcher::GetHandleFromID( int request_id) { HandleMap::iterator iter = handle_map_.find(request_id); CHECK(iter != handle_map_.end()); return iter->second; } } // namespace content