/*************************************************************************** * Copyright (C) 2005 to 2007 by Jonathan Duddington * * email: jonsd@users.sourceforge.net * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write see: * * . * ***************************************************************************/ #include "StdAfx.h" #include "speech.h" #include #include #include #ifndef PLATFORM_DOS #ifdef PLATFORM_WINDOWS #include #include #else #include #endif #endif #ifndef NEED_GETOPT #include #endif #include #include #include #include #include "speak_lib.h" #include "phoneme.h" #include "synthesize.h" #include "voice.h" #include "translate.h" extern void Write4Bytes(FILE *f, int value); char path_home[N_PATH_HOME]; // this is the espeak-data directory char filetype[5]; char wavefile[200]; int (* uri_callback)(int, const char *, const char *) = NULL; int (* phoneme_callback)(const char *) = NULL; FILE *f_wave = NULL; int quiet = 0; unsigned int samples_total = 0; unsigned int samples_split = 0; unsigned int wavefile_count = 0; int end_of_sentence = 0; static const char *help_text = "\nspeak [options] [\"\"]\n\n" "-f Text file to speak\n" "--stdin Read text input from stdin instead of a file\n\n" "If neither -f nor --stdin, are spoken, or if none then text is\n" "spoken from stdin, each line separately.\n\n" "-a \n" "\t Amplitude, 0 to 200, default is 100\n" "-g \n" "\t Word gap. Pause between words, units of 10mS at the default speed\n" "-l \n" "\t Line length. If not zero (which is the default), consider\n" "\t lines less than this length as end-of-clause\n" "-p \n" "\t Pitch adjustment, 0 to 99, default is 50\n" "-s \n" "\t Speed in words per minute 80 to 390, default is 170\n" "-v \n" "\t Use voice file of this name from espeak-data/voices\n" "-w \n" "\t Write output to this WAV file, rather than speaking it directly\n" "-b\t Input text encoding, 1=UTF8, 2=8 bit, 4=16 bit \n" "-m\t Interpret SSML markup, and ignore other < > tags\n" "-q\t Quiet, don't produce any speech (may be useful with -x)\n" "-x\t Write phoneme mnemonics to stdout\n" "-X\t Write phonemes mnemonics and translation trace to stdout\n" "-z\t No final sentence pause at the end of the text\n" "--stdout Write speech output to stdout\n" "--compile=\n" "\t Compile the pronunciation rules and dictionary in the current\n" "\t directory. = is optional and specifies which language\n" "--path=\"\"\n" "\t Specifies the directory containing the espeak-data directory\n" "--phonout=\"\"\n" "\t Write output from -x -X commands and mbrola phoneme data to this file\n" "--punct=\"\"\n" "\t Speak the names of punctuation characters during speaking. If\n" "\t = is omitted, all punctuation is spoken.\n" "--split=\"\"\n" "\t Starts a new WAV file every . Used with -w\n" "--voices=\n" "\t List the available voices for the specified language.\n" "\t If is omitted, then list all voices.\n" "-k \n" "\t Indicate capital letters with: 1=sound, 2=the word \"capitals\",\n" "\t higher values = a pitch increase (try -k20).\n"; void DisplayVoices(FILE *f_out, char *language); USHORT voice_pcnt[N_PEAKS+1][3]; int GetFileLength(const char *filename) {//==================================== struct stat statbuf; if(stat(filename,&statbuf) != 0) return(0); if((statbuf.st_mode & S_IFMT) == S_IFDIR) // if(S_ISDIR(statbuf.st_mode)) return(-2); // a directory return(statbuf.st_size); } // end of GetFileLength char *Alloc(int size) {//================== char *p; if((p = (char *)malloc(size)) == NULL) fprintf(stderr,"Can't allocate memory\n"); return(p); } void Free(void *ptr) {//================= if(ptr != NULL) free(ptr); } void DisplayVoices(FILE *f_out, char *language) {//============================================ int ix; const char *p; int len; int count; int scores = 0; const espeak_VOICE *v; const char *lang_name; char age_buf[12]; const espeak_VOICE **voices; espeak_VOICE voice_select; static char genders[4] = {' ','M','F',' '}; if((language != NULL) && (language[0] != 0)) { // display only voices for the specified language, in order of priority voice_select.languages = language; voice_select.age = 0; voice_select.gender = 0; voice_select.name = NULL; voices = espeak_ListVoices(&voice_select); scores = 1; } else { voices = espeak_ListVoices(NULL); } fprintf(f_out,"Pty Language Age/Gender VoiceName File Other Langs\n"); for(ix=0; (v = voices[ix]) != NULL; ix++) { count = 0; p = v->languages; while(*p != 0) { len = strlen(p+1); lang_name = p+1; if(v->age == 0) strcpy(age_buf," "); else sprintf(age_buf,"%3d",v->age); if(count==0) { fprintf(f_out,"%2d %-12s%s%c %-17s %-11s ", p[0],lang_name,age_buf,genders[v->gender],v->name,v->identifier); } else { fprintf(f_out,"(%s %d)",lang_name,p[0]); } count++; p += len+2; } // if(scores) // fprintf(f_out,"%3d ",v->score); fputc('\n',f_out); } } // end of DisplayVoices static int OpenWaveFile(const char *path, int rate) //================================================= { // Set the length of 0x7ffff000 for --stdout // This will be changed to the correct length for -w (write to file) static unsigned char wave_hdr[44] = { 'R','I','F','F',0x24,0xf0,0xff,0x7f,'W','A','V','E','f','m','t',' ', 0x10,0,0,0,1,0,1,0, 9,0x3d,0,0,0x12,0x7a,0,0, 2,0,0x10,0,'d','a','t','a', 0x00,0xf0,0xff,0x7f}; if(path == NULL) return(2); if(strcmp(path,"stdout")==0) f_wave = stdout; else f_wave = fopen(path,"wb"); if(f_wave != NULL) { fwrite(wave_hdr,1,24,f_wave); Write4Bytes(f_wave,rate); Write4Bytes(f_wave,rate * 2); fwrite(&wave_hdr[32],1,12,f_wave); return(0); } return(1); } // end of OpenWaveFile static void CloseWaveFile() //========================= { unsigned int pos; if((f_wave == NULL) || (f_wave == stdout)) return; fflush(f_wave); pos = ftell(f_wave); fseek(f_wave,4,SEEK_SET); Write4Bytes(f_wave,pos - 8); fseek(f_wave,40,SEEK_SET); Write4Bytes(f_wave,pos - 44); fclose(f_wave); f_wave = NULL; } // end of CloseWaveFile void MarkerEvent(int type, unsigned int char_position, int value, unsigned char *out_ptr) {//====================================================================================== // Do nothing in the command-line version. if(type == 2) end_of_sentence = 1; } // end of MarkerEvent static int WavegenFile(void) {//========================= int finished; unsigned char wav_outbuf[512]; char fname[210]; out_ptr = out_start = wav_outbuf; out_end = wav_outbuf + sizeof(wav_outbuf); finished = WavegenFill(0); if(quiet) return(finished); if(f_wave == NULL) { sprintf(fname,"%s_%.2d%s",wavefile,++wavefile_count,filetype); if(OpenWaveFile(fname, samplerate) != 0) return(1); } if(end_of_sentence) { end_of_sentence = 0; if((samples_split > 0 ) && (samples_total > samples_split)) { CloseWaveFile(); samples_total = 0; } } if(f_wave != NULL) { samples_total += (out_ptr - wav_outbuf)/2; fwrite(wav_outbuf, 1, out_ptr - wav_outbuf, f_wave); } return(finished); } // end of WavegenFile static void init_path(char *argv0, char *path_specified) {//===================================================== if(path_specified) { sprintf(path_home,"%s/espeak-data",path_specified); return; } #ifdef PLATFORM_WINDOWS HKEY RegKey; unsigned long size; unsigned long var_type; char *p; char *env; unsigned char buf[sizeof(path_home)-12]; #if 0 if(((env = getenv("ESPEAK_DATA_PATH")) != NULL) && ((strlen(env)+12) < sizeof(path_home))) { sprintf(path_home,"%s\\espeak-data",env); if(GetFileLength(path_home) == -2) return; // an espeak-data directory exists in the directory specified by environment variable } #endif strcpy(path_home,argv0); if((p = strrchr(path_home,'\\')) != NULL) { strcpy(&p[1],"espeak-data"); if(GetFileLength(path_home) == -2) return; // an espeak-data directory exists in the same directory as the espeak program } // otherwise, look in the Windows Registry buf[0] = 0; RegOpenKeyEx(HKEY_LOCAL_MACHINE, "Software\\Microsoft\\Speech\\Voices\\Tokens\\eSpeak", 0, KEY_READ, &RegKey); size = sizeof(buf); var_type = REG_SZ; RegQueryValueEx(RegKey, "path", 0, &var_type, buf, &size); sprintf(path_home,"%s\\espeak-data",buf); #else #ifdef PLATFORM_DOS strcpy(path_home,PATH_ESPEAK_DATA); #else char *env; #if 0 if((env = getenv("ESPEAK_DATA_PATH")) != NULL) { snprintf(path_home,sizeof(path_home),"%s/espeak-data",env); if(GetFileLength(path_home) == -2) return; // an espeak-data directory exists } #endif #if 0 snprintf(path_home,sizeof(path_home),"%s/espeak-data",getenv("HOME")); if(access(path_home,R_OK) != 0) { strcpy(path_home,PATH_ESPEAK_DATA); } #endif #endif #endif } static int initialise(void) {//======================== int param; int result; // It seems that the wctype functions don't work until the locale has been set // to something other than the default "C". Then, not only Latin1 but also the // other characters give the correct results with iswalpha() etc. #ifdef PLATFORM_RISCOS setlocale(LC_CTYPE,"ISO8859-1"); #else #if 0 if(setlocale(LC_CTYPE,"en_US.UTF-8") == NULL) { if(setlocale(LC_CTYPE,"UTF-8") == NULL) setlocale(LC_CTYPE,""); } #endif #endif WavegenInit(22050,0); // 22050 if((result = LoadPhData()) != 1) { if(result == -1) { fprintf(stderr,"Failed to load espeak-data\n"); exit(1); } else fprintf(stderr,"Wrong version of espeak-data 0x%x (expects 0x%x) at %s\n",result,version_phdata,path_home); } LoadConfig(); SetVoiceStack(NULL); SynthesizeInit(); for(param=0; param 99) pitch_adjustment = 99; break; case 'q': quiet = 1; break; case 'f': strncpy0(filename,optarg2,sizeof(filename)); break; case 'l': value = 0; value = atoi(optarg2); option_linelength = value; break; case 'a': amp = atoi(optarg2); break; case 's': speed = atoi(optarg2); break; case 'g': wordgap = atoi(optarg2); break; case 'v': strncpy0(voicename,optarg2,sizeof(voicename)); break; case 'w': option_waveout = 1; strncpy0(wavefile,optarg2,sizeof(wavefile)); break; case 'z': option_endpause = 0; break; case 0x100: // --stdin flag_stdin = 1; break; case 0x105: // --stdout option_waveout = 1; strcpy(wavefile,"stdout"); break; case 0x101: // --compile-debug case 0x102: // --compile if(optarg2 != NULL) strncpy0(voicename,optarg2,sizeof(voicename)); flag_compile = c; break; case 0x103: // --punct option_punctuation = 1; if(optarg2 != NULL) { ix = 0; while((ix < N_PUNCTLIST) && ((option_punctlist[ix] = optarg2[ix]) != 0)) ix++; option_punctlist[N_PUNCTLIST-1] = 0; option_punctuation = 2; } break; case 0x104: // --voices init_path(argv[0],data_path); DisplayVoices(stdout,optarg2); exit(0); case 0x106: // -- split if(optarg2 == NULL) samples_split = 30; // default 30 minutes else samples_split = atoi(optarg2); break; case 0x107: // --path data_path = optarg2; break; case 0x108: // --phonout if((f_trans = fopen(optarg2,"w")) == NULL) { fprintf(stderr,"Can't write to: %s\n",optarg2); f_trans = stderr; } break; default: exit(0); } } init_path(argv[0],data_path); initialise(); if(flag_compile) { LoadVoice(voicename,5); #ifdef PLATFORM_DOS char path_dsource[sizeof(path_home)+20]; strcpy(path_dsource,path_home); path_dsource[strlen(path_home)-11] = 0; // remove "espeak-data" from the end strcat(path_dsource,"dictsource\\"); CompileDictionary(path_dsource,dictionary_name,NULL,NULL, flag_compile & 0x1); #else #ifdef PLATFORM_WINDOWS char path_dsource[sizeof(path_home)+20]; strcpy(path_dsource,path_home); path_dsource[strlen(path_home)-11] = 0; // remove "espeak-data" from the end strcat(path_dsource,"dictsource\\"); CompileDictionary(path_dsource,dictionary_name,NULL,NULL, flag_compile & 0x1); #else CompileDictionary(NULL,dictionary_name,NULL,NULL, flag_compile & 0x1); #endif #endif exit(0); } if(voicename[0] == 0) strcpy(voicename,"default"); if(SetVoiceByName(voicename) != EE_OK) { memset(&voice_select,0,sizeof(voice_select)); voice_select.languages = voicename; if(SetVoiceByProperties(&voice_select) != EE_OK) { fprintf(stderr,"%svoice '%s'\n",err_load,voicename); exit(2); } } SetParameter(espeakRATE,speed,0); SetParameter(espeakVOLUME,amp,0); SetParameter(espeakCAPITALS,option_capitals,0); SetParameter(espeakPUNCTUATION,option_punctuation,0); SetParameter(espeakWORDGAP,wordgap,0); if(pitch_adjustment != 50) { SetParameter(espeakPITCH,pitch_adjustment,0); } DoVoiceChange(voice); if(filename[0]==0) { if((optind < argc) && (flag_stdin == 0)) { // there's a non-option parameter, and no -f or --stdin // use it as text p_text = argv[optind]; } else { f_text = stdin; if(flag_stdin == 0) option_linelength = -1; // single input lines on stdin } } else { f_text = fopen(filename,"r"); } if((f_text == NULL) && (p_text == NULL)) { fprintf(stderr,"%sfile '%s'\n",err_load,filename); exit(1); } if(option_waveout || quiet) { if(quiet) { // no sound output OpenWaveFile(NULL,samplerate); option_waveout = 1; } else { // write sound output to a WAV file samples_split = (samplerate * samples_split) * 60; if(samples_split) { // don't open the wav file until we start generating speech char *extn; extn = strrchr(wavefile,'.'); if((extn != NULL) && ((wavefile + strlen(wavefile) - extn) <= 4)) { strcpy(filetype,extn); *extn = 0; } } else if(OpenWaveFile(wavefile,samplerate) != 0) { fprintf(stderr,"Can't write to output file '%s'\n'",wavefile); exit(3); } } InitText(0); SpeakNextClause(f_text,p_text,0); ix = 1; for(;;) { if(WavegenFile() != 0) { if(ix == 0) break; // finished, wavegen command queue is empty } if(Generate(phoneme_list,&n_phoneme_list,1)==0) { ix = SpeakNextClause(NULL,NULL,1); } } CloseWaveFile(); } else { // Silence on ^C or SIGINT // signal(SIGINT,StopSpeak); // output sound using portaudio WavegenInitSound(); InitText(0); SpeakNextClause(f_text,p_text,0); if(option_quiet) { while(SpeakNextClause(NULL,NULL,1) != 0); return(0); } #ifdef USE_PORTAUDIO speaking = 1; while(speaking) { // NOTE: if nanosleep() isn't recognised on your system, try replacing // this by sleep(1); #ifdef PLATFORM_WINDOWS Sleep(300); // 0.3s #else #ifdef USE_NANOSLEEP struct timespec period; struct timespec remaining; period.tv_sec = 0; period.tv_nsec = 300000000; // 0.3 sec nanosleep(&period,&remaining); #else sleep(1); #endif #endif if(SynthOnTimer() != 0) speaking = 0; } #else fprintf(stderr,"-w option must be used because the program was built without a sound interface\n"); #endif // USE_PORTAUDIO } if((f_trans != stdout) && (f_trans != stderr)) fclose(f_trans); // needed for WinCe return(0); }