// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <string>

#include "base/bind.h"
#include "base/mac/scoped_nsobject.h"
#include "base/macros.h"
#include "base/no_destructor.h"
#include "base/strings/sys_string_conversions.h"
#include "base/values.h"
#include "content/browser/speech/tts_platform_impl.h"
#include "content/public/browser/tts_controller.h"

#import <Cocoa/Cocoa.h>

class TtsPlatformImplMac;

@interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
 @private
  TtsPlatformImplMac* _ttsImplMac;  // weak.
}

- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;

@end

// Subclass of NSSpeechSynthesizer that takes an utterance
// string on initialization, retains it and only allows it
// to be spoken once.
//
// We construct a new NSSpeechSynthesizer for each utterance, for
// two reasons:
// 1. To associate delegate callbacks with a particular utterance,
//    without assuming anything undocumented about the protocol.
// 2. To work around http://openradar.appspot.com/radar?id=2854403,
//    where Nuance voices don't retain the utterance string and
//    crash when trying to call willSpeakWord.
@interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
 @private
  base::scoped_nsobject<NSString> _utterance;
  bool _didSpeak;
}

- (id)initWithUtterance:(NSString*)utterance;
- (bool)startSpeakingRetainedUtterance;
- (bool)startSpeakingString:(NSString*)utterance;

@end

class TtsPlatformImplMac : public content::TtsPlatformImpl {
 public:
  TtsPlatformImplMac(const TtsPlatformImplMac&) = delete;
  TtsPlatformImplMac& operator=(const TtsPlatformImplMac&) = delete;

  bool PlatformImplSupported() override { return true; }
  bool PlatformImplInitialized() override { return true; }

  void Speak(int utterance_id,
             const std::string& utterance,
             const std::string& lang,
             const content::VoiceData& voice,
             const content::UtteranceContinuousParameters& params,
             base::OnceCallback<void(bool)> on_speak_finished) override;

  bool StopSpeaking() override;

  void Pause() override;

  void Resume() override;

  bool IsSpeaking() override;

  void GetVoices(std::vector<content::VoiceData>* out_voices) override;

  // Called by ChromeTtsDelegate when we get a callback from the
  // native speech engine.
  void OnSpeechEvent(NSSpeechSynthesizer* sender,
                     content::TtsEventType event_type,
                     int char_index,
                     int char_length,
                     const std::string& error_message);

  // Get the single instance of this class.
  static TtsPlatformImplMac* GetInstance();

 private:
  friend base::NoDestructor<TtsPlatformImplMac>;
  TtsPlatformImplMac();

  void ProcessSpeech(int utterance_id,
                     const std::string& lang,
                     const content::VoiceData& voice,
                     const content::UtteranceContinuousParameters& params,
                     base::OnceCallback<void(bool)> on_speak_finished,
                     const std::string& parsed_utterance);

  base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
  base::scoped_nsobject<ChromeTtsDelegate> delegate_;
  int utterance_id_ = -1;
  std::string utterance_;
  int last_char_index_ = 0;
  bool paused_ = false;
};

// static
content::TtsPlatformImpl* content::TtsPlatformImpl::GetInstance() {
  return TtsPlatformImplMac::GetInstance();
}

void TtsPlatformImplMac::Speak(
    int utterance_id,
    const std::string& utterance,
    const std::string& lang,
    const content::VoiceData& voice,
    const content::UtteranceContinuousParameters& params,
    base::OnceCallback<void(bool)> on_speak_finished) {
  // Parse SSML and process speech.
  content::TtsController::GetInstance()->StripSSML(
      utterance, base::BindOnce(&TtsPlatformImplMac::ProcessSpeech,
                                base::Unretained(this), utterance_id, lang,
                                voice, params, std::move(on_speak_finished)));
}

void TtsPlatformImplMac::ProcessSpeech(
    int utterance_id,
    const std::string& lang,
    const content::VoiceData& voice,
    const content::UtteranceContinuousParameters& params,
    base::OnceCallback<void(bool)> on_speak_finished,
    const std::string& parsed_utterance) {
  utterance_ = parsed_utterance;
  paused_ = false;

  NSString* utterance_nsstring =
      [NSString stringWithUTF8String:utterance_.c_str()];
  if (!utterance_nsstring) {
    std::move(on_speak_finished).Run(false);
    return;
  }

  // Deliberately construct a new speech synthesizer every time Speak is
  // called, otherwise there's no way to know whether calls to the delegate
  // apply to the current utterance or a previous utterance. In
  // experimentation, the overhead of constructing and destructing a
  // NSSpeechSynthesizer is minimal.
  speech_synthesizer_.reset([[SingleUseSpeechSynthesizer alloc]
      initWithUtterance:utterance_nsstring]);
  [speech_synthesizer_ setDelegate:delegate_];

  if (!voice.native_voice_identifier.empty()) {
    NSString* native_voice_identifier =
        [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
    [speech_synthesizer_ setVoice:native_voice_identifier];
  }

  utterance_id_ = utterance_id;

  // TODO: support languages other than the default: crbug.com/88059

  if (params.rate >= 0.0) {
    // The TTS api defines rate via words per minute. Let 200 be the default.
    [speech_synthesizer_ setObject:[NSNumber numberWithInt:params.rate * 200]
                       forProperty:NSSpeechRateProperty
                             error:nil];
  }

  if (params.pitch >= 0.0) {
    // The input is a float from 0.0 to 2.0, with 1.0 being the default.
    // Get the default pitch for this voice and modulate it by 50% - 150%.
    NSError* errorCode;
    NSNumber* defaultPitchObj =
        [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
                                         error:&errorCode];
    int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
    int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
    [speech_synthesizer_ setObject:[NSNumber numberWithInt:newPitch]
                       forProperty:NSSpeechPitchBaseProperty
                             error:nil];
  }

  if (params.volume >= 0.0) {
    [speech_synthesizer_ setObject:[NSNumber numberWithFloat:params.volume]
                       forProperty:NSSpeechVolumeProperty
                             error:nil];
  }

  bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
  if (success) {
    content::TtsController* controller = content::TtsController::GetInstance();
    controller->OnTtsEvent(utterance_id_, content::TTS_EVENT_START, 0, -1, "");
  }
  std::move(on_speak_finished).Run(success);
}

bool TtsPlatformImplMac::StopSpeaking() {
  if (speech_synthesizer_.get()) {
    [speech_synthesizer_ stopSpeaking];
    speech_synthesizer_.reset(nil);
  }
  paused_ = false;
  return true;
}

void TtsPlatformImplMac::Pause() {
  if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
    [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
    paused_ = true;
    content::TtsController::GetInstance()->OnTtsEvent(
        utterance_id_, content::TTS_EVENT_PAUSE, last_char_index_, -1, "");
  }
}

void TtsPlatformImplMac::Resume() {
  if (speech_synthesizer_.get() && utterance_id_ && paused_) {
    [speech_synthesizer_ continueSpeaking];
    paused_ = false;
    content::TtsController::GetInstance()->OnTtsEvent(
        utterance_id_, content::TTS_EVENT_RESUME, last_char_index_, -1, "");
  }
}

bool TtsPlatformImplMac::IsSpeaking() {
  if (speech_synthesizer_)
    return [speech_synthesizer_ isSpeaking];
  return false;
}

void TtsPlatformImplMac::GetVoices(std::vector<content::VoiceData>* outVoices) {
  NSArray* voices = [NSSpeechSynthesizer availableVoices];

  // Create a new temporary array of the available voices with
  // the default voice first.
  NSMutableArray* orderedVoices =
      [NSMutableArray arrayWithCapacity:[voices count]];
  NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
  if (defaultVoice) {
    [orderedVoices addObject:defaultVoice];
  }
  for (NSString* voiceIdentifier in voices) {
    if (![voiceIdentifier isEqualToString:defaultVoice])
      [orderedVoices addObject:voiceIdentifier];
  }

  for (NSString* voiceIdentifier in orderedVoices) {
    outVoices->push_back(content::VoiceData());
    content::VoiceData& data = outVoices->back();

    NSDictionary* attributes =
        [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
    NSString* name = [attributes objectForKey:NSVoiceName];
    NSString* localeIdentifier =
        [attributes objectForKey:NSVoiceLocaleIdentifier];

    data.native = true;
    data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
    data.name = base::SysNSStringToUTF8(name);

    NSDictionary* localeComponents =
        [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
    NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
    NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
    if (language && country) {
      data.lang = base::SysNSStringToUTF8(
          [NSString stringWithFormat:@"%@-%@", language, country]);
    } else {
      data.lang = base::SysNSStringToUTF8(language);
    }
    data.events.insert(content::TTS_EVENT_START);
    data.events.insert(content::TTS_EVENT_END);
    data.events.insert(content::TTS_EVENT_WORD);
    data.events.insert(content::TTS_EVENT_ERROR);
    data.events.insert(content::TTS_EVENT_CANCELLED);
    data.events.insert(content::TTS_EVENT_INTERRUPTED);
    data.events.insert(content::TTS_EVENT_PAUSE);
    data.events.insert(content::TTS_EVENT_RESUME);
  }
}

void TtsPlatformImplMac::OnSpeechEvent(NSSpeechSynthesizer* sender,
                                       content::TtsEventType event_type,
                                       int char_index,
                                       int char_length,
                                       const std::string& error_message) {
  // Don't send events from an utterance that's already completed.
  // This depends on the fact that we construct a new NSSpeechSynthesizer
  // each time we call Speak.
  if (sender != speech_synthesizer_.get())
    return;

  if (event_type == content::TTS_EVENT_END)
    char_index = utterance_.size();

  content::TtsController::GetInstance()->OnTtsEvent(
      utterance_id_, event_type, char_index, char_length, error_message);
  last_char_index_ = char_index;
}

TtsPlatformImplMac::TtsPlatformImplMac() {
  delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
}

// static
TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
  static base::NoDestructor<TtsPlatformImplMac> tts_platform;
  return tts_platform.get();
}

@implementation ChromeTtsDelegate

- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
  if ((self = [super init])) {
    _ttsImplMac = ttsImplMac;
  }
  return self;
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
        didFinishSpeaking:(BOOL)finished_speaking {
  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_END, 0, -1, "");
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
            willSpeakWord:(NSRange)word_range
                 ofString:(NSString*)string {
  // Ignore bogus word_range. The Mac speech synthesizer is a bit
  // buggy and occasionally returns a number way out of range.
  if (word_range.location > [string length])
    return;

  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_WORD,
                             word_range.location, word_range.length, "");
}

- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
    didEncounterErrorAtIndex:(NSUInteger)character_index
                    ofString:(NSString*)string
                     message:(NSString*)message {
  // Ignore bogus character_index. The Mac speech synthesizer is a bit
  // buggy and occasionally returns a number way out of range.
  if (character_index > [string length])
    return;

  std::string message_utf8 = base::SysNSStringToUTF8(message);
  _ttsImplMac->OnSpeechEvent(sender, content::TTS_EVENT_ERROR, character_index,
                             -1, message_utf8);
}

@end

@implementation SingleUseSpeechSynthesizer

- (id)initWithUtterance:(NSString*)utterance {
  self = [super init];
  if (self) {
    _utterance.reset([utterance retain]);
    _didSpeak = false;
  }
  return self;
}

- (bool)startSpeakingRetainedUtterance {
  CHECK(!_didSpeak);
  CHECK(_utterance);
  _didSpeak = true;
  return [super startSpeakingString:_utterance];
}

- (bool)startSpeakingString:(NSString*)utterance {
  CHECK(false);
  return false;
}

@end