// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include "base/bind.h" #include "base/mac/scoped_nsobject.h" #include "base/macros.h" #include "base/no_destructor.h" #include "base/strings/sys_string_conversions.h" #include "base/values.h" #include "content/browser/speech/tts_platform_impl.h" #include "content/public/browser/tts_controller.h" #import class TtsPlatformImplMac; @interface ChromeTtsDelegate : NSObject { @private TtsPlatformImplMac* _ttsImplMac; // weak. } - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac; @end // Subclass of NSSpeechSynthesizer that takes an utterance // string on initialization, retains it and only allows it // to be spoken once. // // We construct a new NSSpeechSynthesizer for each utterance, for // two reasons: // 1. To associate delegate callbacks with a particular utterance, // without assuming anything undocumented about the protocol. // 2. To work around http://openradar.appspot.com/radar?id=2854403, // where Nuance voices don't retain the utterance string and // crash when trying to call willSpeakWord. @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer { @private base::scoped_nsobject _utterance; bool _didSpeak; } - (id)initWithUtterance:(NSString*)utterance; - (bool)startSpeakingRetainedUtterance; - (bool)startSpeakingString:(NSString*)utterance; @end class TtsPlatformImplMac : public content::TtsPlatformImpl { public: TtsPlatformImplMac(const TtsPlatformImplMac&) = delete; TtsPlatformImplMac& operator=(const TtsPlatformImplMac&) = delete; bool PlatformImplSupported() override { return true; } bool PlatformImplInitialized() override { return true; } void Speak(int utterance_id, const std::string& utterance, const std::string& lang, const content::VoiceData& voice, const content::UtteranceContinuousParameters& params, base::OnceCallback on_speak_finished) override; bool StopSpeaking() override; void Pause() override; void Resume() override; bool IsSpeaking() override; void GetVoices(std::vector* out_voices) override; // Called by ChromeTtsDelegate when we get a callback from the // native speech engine. void OnSpeechEvent(NSSpeechSynthesizer* sender, content::TtsEventType event_type, int char_index, int char_length, const std::string& error_message); // Get the single instance of this class. static TtsPlatformImplMac* GetInstance(); private: friend base::NoDestructor; TtsPlatformImplMac(); void ProcessSpeech(int utterance_id, const std::string& lang, const content::VoiceData& voice, const content::UtteranceContinuousParameters& params, base::OnceCallback on_speak_finished, const std::string& parsed_utterance); base::scoped_nsobject speech_synthesizer_; base::scoped_nsobject delegate_; int utterance_id_ = -1; std::string utterance_; int last_char_index_ = 0; bool paused_ = false; }; // static content::TtsPlatformImpl* content::TtsPlatformImpl::GetInstance() { return TtsPlatformImplMac::GetInstance(); } void TtsPlatformImplMac::Speak( int utterance_id, const std::string& utterance, const std::string& lang, const content::VoiceData& voice, const content::UtteranceContinuousParameters& params, base::OnceCallback on_speak_finished) { // Parse SSML and process speech. content::TtsController::GetInstance()->StripSSML( utterance, base::BindOnce(&TtsPlatformImplMac::ProcessSpeech, base::Unretained(this), utterance_id, lang, voice, params, std::move(on_speak_finished))); } void TtsPlatformImplMac::ProcessSpeech( int utterance_id, const std::string& lang, const content::VoiceData& voice, const content::UtteranceContinuousParameters& params, base::OnceCallback on_speak_finished, const std::string& parsed_utterance) { utterance_ = parsed_utterance; paused_ = false; NSString* utterance_nsstring = [NSString stringWithUTF8String:utterance_.c_str()]; if (!utterance_nsstring) { std::move(on_speak_finished).Run(false); return; } // Deliberately construct a new speech synthesizer every time Speak is // called, otherwise there's no way to know whether calls to the delegate // apply to the current utterance or a previous utterance. In // experimentation, the overhead of constructing and destructing a // NSSpeechSynthesizer is minimal. speech_synthesizer_.reset([[SingleUseSpeechSynthesizer alloc] initWithUtterance:utterance_nsstring]); [speech_synthesizer_ setDelegate:delegate_]; if (!voice.native_voice_identifier.empty()) { NSString* native_voice_identifier = [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()]; [speech_synthesizer_ setVoice:native_voice_identifier]; } utterance_id_ = utterance_id; // TODO: support languages other than the default: crbug.com/88059 if (params.rate >= 0.0) { // The TTS api defines rate via words per minute. Let 200 be the default. [speech_synthesizer_ setObject:[NSNumber numberWithInt:params.rate * 200] forProperty:NSSpeechRateProperty error:nil]; } if (params.pitch >= 0.0) { // The input is a float from 0.0 to 2.0, with 1.0 being the default. // Get the default pitch for this voice and modulate it by 50% - 150%. NSError* errorCode; NSNumber* defaultPitchObj = [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty error:&errorCode]; int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48; int newPitch = static_cast(defaultPitch * (0.5 * params.pitch + 0.5)); [speech_synthesizer_ setObject:[NSNumber numberWithInt:newPitch] forProperty:NSSpeechPitchBaseProperty error:nil]; } if (params.volume >= 0.0) { [speech_synthesizer_ setObject:[NSNumber numberWithFloat:params.volume] forProperty:NSSpeechVolumeProperty error:nil]; } bool success = [speech_synthesizer_ startSpeakingRetainedUtterance]; if (success) { content::TtsController* controller = content::TtsController::GetInstance(); controller->OnTtsEvent(utterance_id_, content::TTS_EVENT_START, 0, -1, ""); } std::move(on_speak_finished).Run(success); } bool TtsPlatformImplMac::StopSpeaking() { if (speech_synthesizer_.get()) { [speech_synthesizer_ stopSpeaking]; speech_synthesizer_.reset(nil); } paused_ = false; return true; } void TtsPlatformImplMac::Pause() { if (speech_synthesizer_.get() && utterance_id_ && !paused_) { [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; paused_ = true; content::TtsController::GetInstance()->OnTtsEvent( utterance_id_, content::TTS_EVENT_PAUSE, last_char_index_, -1, ""); } } void TtsPlatformImplMac::Resume() { if (speech_synthesizer_.get() && utterance_id_ && paused_) { [speech_synthesizer_ continueSpeaking]; paused_ = false; content::TtsController::GetInstance()->OnTtsEvent( utterance_id_, content::TTS_EVENT_RESUME, last_char_index_, -1, ""); } } bool TtsPlatformImplMac::IsSpeaking() { if (speech_synthesizer_) return [speech_synthesizer_ isSpeaking]; return false; } void TtsPlatformImplMac::GetVoices(std::vector* outVoices) { NSArray* voices = [NSSpeechSynthesizer availableVoices]; // Create a new temporary array of the available voices with // the default voice first. NSMutableArray* orderedVoices = [NSMutableArray arrayWithCapacity:[voices count]]; NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; if (defaultVoice) { [orderedVoices addObject:defaultVoice]; } for (NSString* voiceIdentifier in voices) { if (![voiceIdentifier isEqualToString:defaultVoice]) [orderedVoices addObject:voiceIdentifier]; } for (NSString* voiceIdentifier in orderedVoices) { outVoices->push_back(content::VoiceData()); content::VoiceData& data = outVoices->back(); NSDictionary* attributes = [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; NSString* name = [attributes objectForKey:NSVoiceName]; NSString* localeIdentifier = [attributes objectForKey:NSVoiceLocaleIdentifier]; data.native = true; data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier); data.name = base::SysNSStringToUTF8(name); NSDictionary* localeComponents = [NSLocale componentsFromLocaleIdentifier:localeIdentifier]; NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode]; NSString* country = [localeComponents objectForKey:NSLocaleCountryCode]; if (language && country) { data.lang = base::SysNSStringToUTF8( [NSString stringWithFormat:@"%@-%@", language, country]); } else { data.lang = base::SysNSStringToUTF8(language); } data.events.insert(content::TTS_EVENT_START); data.events.insert(content::TTS_EVENT_END); data.events.insert(content::TTS_EVENT_WORD); data.events.insert(content::TTS_EVENT_ERROR); data.events.insert(content::TTS_EVENT_CANCELLED); data.events.insert(content::TTS_EVENT_INTERRUPTED); data.events.insert(content::TTS_EVENT_PAUSE); data.events.insert(content::TTS_EVENT_RESUME); } } void TtsPlatformImplMac::OnSpeechEvent(NSSpeechSynthesizer* sender, content::TtsEventType event_type, int char_index, int char_length, const std::string& error_message) { // Don't send events from an utterance that's already completed. // This depends on the fact that we construct a new NSSpeechSynthesizer // each time we call Speak. if (sender != speech_synthesizer_.get()) return; if (event_type == content::TTS_EVENT_END) char_index = utterance_.size(); content::TtsController::GetInstance()->OnTtsEvent( utterance_id_, event_type, char_index, char_length, error_message); last_char_index_ = char_index; } TtsPlatformImplMac::TtsPlatformImplMac() { delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]); } // static TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() { static base::NoDestructor tts_platform; return tts_platform.get(); } @implementation ChromeTtsDelegate - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac { if ((self = [super init])) { _ttsImplMac = ttsImplMac; } return self; } - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender didFinishSpeaking:(BOOL)finished_speaking { _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_END, 0, -1, ""); } - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender willSpeakWord:(NSRange)word_range ofString:(NSString*)string { // Ignore bogus word_range. The Mac speech synthesizer is a bit // buggy and occasionally returns a number way out of range. if (word_range.location > [string length]) return; _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_WORD, word_range.location, word_range.length, ""); } - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender didEncounterErrorAtIndex:(NSUInteger)character_index ofString:(NSString*)string message:(NSString*)message { // Ignore bogus character_index. The Mac speech synthesizer is a bit // buggy and occasionally returns a number way out of range. if (character_index > [string length]) return; std::string message_utf8 = base::SysNSStringToUTF8(message); _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_ERROR, character_index, -1, message_utf8); } @end @implementation SingleUseSpeechSynthesizer - (id)initWithUtterance:(NSString*)utterance { self = [super init]; if (self) { _utterance.reset([utterance retain]); _didSpeak = false; } return self; } - (bool)startSpeakingRetainedUtterance { CHECK(!_didSpeak); CHECK(_utterance); _didSpeak = true; return [super startSpeakingString:_utterance]; } - (bool)startSpeakingString:(NSString*)utterance { CHECK(false); return false; } @end