/***************************************************************************
* Copyright (C) 2005 to 2007 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write see: *
* . *
***************************************************************************/
#include "StdAfx.h"
#include
#include
#include
#include "speak_lib.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
#include "voice.h"
#include "translate.h"
extern int GetAmplitude(void);
// convert from words-per-minute to internal speed factor
static unsigned char speed_lookup[290] = {
250, 246, 243, 239, 236, // 80
233, 229, 226, 223, 220, // 85
217, 214, 211, 208, 205, // 90
202, 197, 194, 192, 190, // 95
187, 185, 183, 180, 178, // 100
176, 174, 172, 170, 168, // 105
166, 164, 161, 159, 158, // 110
156, 154, 152, 150, 148, // 115
146, 145, 143, 141, 137, // 120
136, 135, 133, 132, 131, // 125
129, 128, 127, 126, 125, // 130
124, 122, 121, 120, 119, // 135
117, 116, 115, 114, 113, // 140
112, 111, 110, 108, 107, // 145
106, 105, 104, 103, 102, // 150
101, 100, 99, 98, 97, // 155
96, 95, 93, 92, 92, // 160
91, 90, 89, 89, 88, // 165
87, 87, 86, 85, 85, // 170
84, 83, 83, 82, 81, // 175
80, 80, 79, 78, 78, // 180
77, 76, 76, 75, 73, // 185
72, 72, 71, 71, 70, // 190
70, 69, 69, 68, 67, // 195
67, 66, 66, 65, 65, // 200
64, 64, 63, 63, 62, // 205
62, 61, 60, 60, 59, // 210
59, 58, 58, 57, 57, // 215
56, 56, 55, 55, 55, // 220
54, 54, 53, 53, 52, // 225
52, 51, 51, 50, 50, // 230
49, 49, 49, 48, 48, // 235
47, 47, 46, 46, 46, // 240
45, 45, 44, 44, 43, // 245
43, 43, 42, 42, 41, // 250
41, 41, 40, 40, 39, // 255
39, 39, 38, 38, 38, // 260
37, 37, 37, 36, 36, // 265
35, 35, 35, 34, 34, // 270
34, 33, 33, 33, 32, // 275
32, 32, 32, 31, 31, // 280
31, 30, 30, 30, 29, // 285
29, 29, 29, 28, 28, // 290
28, 28, 27, 27, 27, // 295
26, 26, 26, 26, 25, // 300
25, 25, 22, 22, 22, // 305
22, 22, 22, 22, 22, // 310
21, 21, 21, 21, 21, // 315
21, 20, 20, 20, 20, // 320
20, 15, 15, 15, 15, // 325
15, 15, 15, 15, 16, // 330
16, 16, 16, 15, 15, // 335
15, 15, 15, 15, 15, // 340
15, 17, 17, 16, 16, // 345
15, 15, 14, 14, 13, // 350
13, 12, 12, 11, 11, // 355
10, 10, 9, 8, 8, // 360
7, 6, 5, 5, 4, // 365
};
// speed_factor2 adjustments for speeds 370 to 390
static unsigned char faster[] = {
114,112,110,109,107,105,104,102,100,98, // 370-379
96,94,92,90,88,85,83,80,78,75,72 }; //380-390
static int speed1 = 130;
static int speed2 = 121;
static int speed3 = 118;
void SetSpeed(int control)
{//=======================
int x;
int s1;
int wpm;
int wpm2;
wpm = embedded_value[EMBED_S];
if(control == 2)
wpm = embedded_value[EMBED_S2];
wpm2 = wpm;
if(wpm > 369) wpm = 369;
if(wpm < 80) wpm = 80;
x = speed_lookup[wpm-80];
if(control & 1)
{
// set speed factors for different syllable positions within a word
// these are used in CalcLengths()
speed1 = (x * voice->speedf1)/256;
speed2 = (x * voice->speedf2)/256;
speed3 = (x * voice->speedf3)/256;
}
if(control & 2)
{
// these are used in synthesis file
s1 = (x * voice->speedf1)/256;
speed.speed_factor1 = (256 * s1)/115; // full speed adjustment, used for pause length
if(speed.speed_factor1 < 15)
speed.speed_factor1 = 15;
if(wpm >= 170)
// speed_factor2 = 100 + (166*s1)/128; // reduced speed adjustment, used for playing recorded sounds
speed.speed_factor2 = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
else
speed.speed_factor2 = 128 + (128*s1)/130; // = 215 at 170 wpm
if(wpm2 > 369)
{
if(wpm2 > 390)
wpm2 = 390;
speed.speed_factor2 = faster[wpm2 - 370];
}
}
speed.min_sample_len = 450;
speed.speed_factor3 = 110; // controls the effect of FRFLAG_LEN_MOD reduce length change
if(wpm2 >= 370)
{
// TESTING
// use experimental fast settings if they have been specified in the Voice
if(speed.fast_settings[0] > 0)
speed.speed_factor1 = speed.fast_settings[0];
if(speed.fast_settings[1] > 0)
speed.speed_factor2 = speed.fast_settings[1];
if(speed.fast_settings[2] > 0)
speed.speed_factor3 = speed.fast_settings[2];
}
} // end of SetSpeed
#ifdef deleted
void SetAmplitude(int amp)
{//=======================
static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 };
if((amp >= 0) && (amp <= 20))
{
option_amplitude = (amplitude_factor[amp] * 480)/256;
}
}
#endif
void SetParameter(int parameter, int value, int relative)
{//======================================================
// parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
// relative 0=absolute 1=relative
int new_value = value;
int default_value;
if(relative)
{
if(parameter < 5)
{
default_value = param_defaults[parameter];
new_value = default_value + (default_value * value)/100;
}
}
param_stack[0].parameter[parameter] = new_value;
switch(parameter)
{
case espeakRATE:
embedded_value[EMBED_S] = new_value;
embedded_value[EMBED_S2] = new_value;
SetSpeed(3);
break;
case espeakVOLUME:
embedded_value[EMBED_A] = new_value;
GetAmplitude();
break;
case espeakPITCH:
if(new_value > 99) new_value = 99;
if(new_value < 0) new_value = 0;
embedded_value[EMBED_P] = new_value;
break;
case espeakRANGE:
if(new_value > 99) new_value = 99;
embedded_value[EMBED_R] = new_value;
break;
case espeakLINELENGTH:
option_linelength = new_value;
break;
case espeakWORDGAP:
option_wordgap = new_value;
break;
case espeakINTONATION:
if((new_value & 0xff) != 0)
translator->langopts.intonation_group = new_value & 0xff;
option_tone_flags = new_value;
break;
default:
break;
}
} // end of SetParameter
static void DoEmbedded2(int *embix)
{//================================
// There were embedded commands in the text at this point
unsigned int word;
do {
word = embedded_list[(*embix)++];
if((word & 0x1f) == EMBED_S)
{
// speed
SetEmbedded(word & 0x7f, word >> 8); // adjusts embedded_value[EMBED_S]
SetSpeed(1);
}
} while((word & 0x80) == 0);
}
void CalcLengths(Translator *tr)
{//==============================
int ix;
int ix2;
PHONEME_LIST *prev;
PHONEME_LIST *next;
PHONEME_LIST *next2;
PHONEME_LIST *next3;
PHONEME_LIST *p;
PHONEME_LIST *p2;
int stress;
int type;
static int more_syllables=0;
int pre_sonorant=0;
int pre_voiced=0;
int last_pitch = 0;
int pitch_start;
int length_mod;
int len;
int env2;
int end_of_clause;
int embedded_ix = 0;
int min_drop;
int emphasized;
int tone_mod;
unsigned char *pitch_env=NULL;
for(ix=1; ixstresslevel & 0x7;
emphasized = p->stresslevel & 0x8;
next = &phoneme_list[ix+1];
if(p->synthflags & SFLAG_EMBEDDED)
{
DoEmbedded2(&embedded_ix);
}
type = p->type;
if(p->synthflags & SFLAG_SYLLABLE)
type = phVOWEL;
switch(type)
{
case phPAUSE:
last_pitch = 0;
break;
case phSTOP:
last_pitch = 0;
if(prev->type == phFRICATIVE)
p->prepause = 20;
else
if((more_syllables > 0) || (stress < 4))
p->prepause = 40;
else
p->prepause = 60;
if(prev->type == phSTOP)
p->prepause = 60;
if((tr->langopts.word_gap & 0x10) && (p->newword))
p->prepause = 60;
if(p->ph->phflags & phLENGTHENSTOP)
p->prepause += 30;
if(p->synthflags & SFLAG_LENGTHEN)
p->prepause += tr->langopts.long_stop;
break;
case phVFRICATIVE:
if(next->type==phVOWEL)
{
pre_voiced = 1;
} // drop through
case phFRICATIVE:
if(p->newword)
p->prepause = 15;
if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS))
p->prepause = 25;
if(prev->ph->phflags & phBRKAFTER)
p->prepause = 30;
if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword)
{
if(prev->type == phVOWEL)
p->length = 200; // ?? should do this if it's from a prefix
else
p->length = 150;
}
else
p->length = 256;
if((tr->langopts.word_gap & 0x10) && (p->newword))
p->prepause = 30;
break;
case phVSTOP:
if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
p->prepause = 30;
if(next->type==phVOWEL || next->type==phLIQUID)
{
if((next->type==phVOWEL) || !next->newword)
pre_voiced = 1;
p->prepause = 40;
if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r')))
p->prepause = 0;
else
if(p->newword==0)
{
if(prev->type==phLIQUID)
p->prepause = 20;
if(prev->type==phNASAL)
p->prepause = 12;
if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS))
p->prepause = 0;
}
}
if((tr->langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
p->prepause = 20;
break;
case phLIQUID:
case phNASAL:
p->amp = tr->stress_amps[1]; // unless changed later
p->length = 256; // TEMPORARY
min_drop = 0;
if(p->newword)
{
if(prev->type==phLIQUID)
p->prepause = 25;
if(prev->type==phVOWEL)
p->prepause = 12;
}
if(next->type==phVOWEL)
{
pre_sonorant = 1;
}
else
if((prev->type==phVOWEL) || (prev->type == phLIQUID))
{
p->length = prev->length;
p->pitch2 = last_pitch;
if(p->pitch2 < 7)
p->pitch2 = 7;
p->pitch1 = p->pitch2 - 8;
p->env = PITCHfall;
pre_voiced = 0;
if(p->type == phLIQUID)
{
p->length = speed1;
//p->pitch1 = p->pitch2 - 20; // post vocalic [r/]
}
if(next->type == phVSTOP)
{
p->length = (p->length * 160)/100;
}
if(next->type == phVFRICATIVE)
{
p->length = (p->length * 120)/100;
}
}
else
{
p->pitch2 = last_pitch;
for(ix2=ix; ix2pitch2 = phoneme_list[ix2].pitch2;
break;
}
}
p->pitch1 = p->pitch2-8;
p->env = PITCHfall;
pre_voiced = 0;
}
break;
case phVOWEL:
min_drop = 0;
next2 = &phoneme_list[ix+2];
next3 = &phoneme_list[ix+3];
if(stress > 7) stress = 7;
if(pre_sonorant)
p->amp = tr->stress_amps[stress]-1;
else
p->amp = tr->stress_amps[stress];
if(emphasized)
p->amp = 25;
if(ix >= (n_phoneme_list-3))
{
// last phoneme of a clause, limit its amplitude
if(p->amp > tr->langopts.param[LOPT_MAXAMP_EOC])
p->amp = tr->langopts.param[LOPT_MAXAMP_EOC];
}
// is the last syllable of a word ?
more_syllables=0;
end_of_clause = 0;
for(p2 = p+1; p2->newword== 0; p2++)
{
if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
more_syllables++;
if(p2->ph->code == phonPAUSE_CLAUSE)
end_of_clause = 2;
}
if(p2->ph->code == phonPAUSE_CLAUSE)
end_of_clause = 2;
if((p2->newword & 2) && (more_syllables==0))
{
end_of_clause = 2;
}
// calc length modifier
if((next->ph->code == phonPAUSE_VSHORT) && (next2->type == phPAUSE))
{
// if PAUSE_VSHORT is followed by a pause, then use that
next = next2;
next2 = next3;
next3 = &phoneme_list[ix+4];
}
if(more_syllables==0)
{
len = tr->langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod];
if((next->newword) && (tr->langopts.word_gap & 0x20))
{
// consider as a pause + first phoneme of the next word
length_mod = (len + tr->langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
}
else
length_mod = len;
}
else
{
length_mod = tr->langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod];
if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS))
length_mod -= 15;
}
if(more_syllables==0)
length_mod *= speed1;
else
if(more_syllables==1)
length_mod *= speed2;
else
length_mod *= speed3;
length_mod = length_mod / 128;
if(length_mod < 8)
length_mod = 8; // restrict how much lengths can be reduced
if(stress >= 7)
{
// tonic syllable, include a constant component so it doesn't decrease directly with speed
length_mod += 20;
if(emphasized)
length_mod += 10;
}
else
if(emphasized)
{
length_mod += 20;
}
if((len = tr->stress_lengths[stress]) == 0)
len = tr->stress_lengths[6];
length_mod = (length_mod * len)/128;
if(p->tone_ph != 0)
{
if((tone_mod = phoneme_tab[p->tone_ph]->std_length) > 0)
{
// a tone phoneme specifies a percentage change to the length
length_mod = (length_mod * tone_mod) / 100;
}
}
if(end_of_clause == 2)
{
// this is the last syllable in the clause, lengthen it - more for short vowels
len = p->ph->std_length;
if(tr->langopts.stress_flags & 0x40000)
len=200; // don't lengthen short vowels more than long vowels at end-of-clause
length_mod = length_mod * (256 + (280 - len)/3)/256;
}
if(p->type != phVOWEL)
{
length_mod = 256; // syllabic consonant
min_drop = 8;
}
p->length = length_mod;
// pre-vocalic part
// set last-pitch
env2 = p->env;
if(env2 > 1) env2++; // version for use with preceding semi-vowel
if(p->tone_ph != 0)
{
pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect);
}
else
{
pitch_env = envelope_data[env2];
}
pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;
if(pre_sonorant || pre_voiced)
{
// set pitch for pre-vocalic part
if(pitch_start == 1024)
last_pitch = pitch_start; // pitch is not set
if(pitch_start - last_pitch > 8) // was 9
last_pitch = pitch_start - 8;
prev->pitch1 = last_pitch;
prev->pitch2 = pitch_start;
if(last_pitch < pitch_start)
{
prev->env = PITCHrise;
p->env = env2;
}
else
{
prev->env = PITCHfall;
}
prev->length = length_mod;
prev->amp = p->amp;
if((prev->type != phLIQUID) && (prev->amp > 18))
prev->amp = 18;
}
// vowel & post-vocalic part
next->synthflags &= ~SFLAG_SEQCONTINUE;
if(next->type == phNASAL && next2->type != phVOWEL)
next->synthflags |= SFLAG_SEQCONTINUE;
if(next->type == phLIQUID)
{
next->synthflags |= SFLAG_SEQCONTINUE;
if(next2->type == phVOWEL)
{
next->synthflags &= ~SFLAG_SEQCONTINUE;
}
if(next2->type != phVOWEL)
{
if(next->ph->mnemonic == ('/'*256+'r'))
{
next->synthflags &= ~SFLAG_SEQCONTINUE;
// min_drop = 15;
}
}
}
if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop))
{
p->pitch1 = p->pitch2 - min_drop;
if(p->pitch1 < 0)
p->pitch1 = 0;
}
last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
pre_sonorant = 0;
pre_voiced = 0;
break;
}
}
} // end of CalcLengths