summaryrefslogtreecommitdiff
path: root/navit/support/espeak/translate.h
diff options
context:
space:
mode:
Diffstat (limited to 'navit/support/espeak/translate.h')
-rw-r--r--[-rwxr-xr-x]navit/support/espeak/translate.h466
1 files changed, 337 insertions, 129 deletions
diff --git a/navit/support/espeak/translate.h b/navit/support/espeak/translate.h
index 0556bf280..fe25b191d 100755..100644
--- a/navit/support/espeak/translate.h
+++ b/navit/support/espeak/translate.h
@@ -1,5 +1,5 @@
/***************************************************************************
- * Copyright (C) 2005 to 2007 by Jonathan Duddington *
+ * Copyright (C) 2005 to 2014 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
@@ -18,18 +18,21 @@
***************************************************************************/
-#define L(c1,c2) (c1<<8)+c2 // combine two characters into an integer for translator name
+#define L(c1,c2) (c1<<8)+c2 // combine two characters into an integer for translator name
#define CTRL_EMBEDDED 0x01 // control character at the start of an embedded command
#define REPLACED_E 'E' // 'e' replaced by silent e
-#define N_WORD_PHONEMES 160 // max phonemes in a word
+#define N_WORD_PHONEMES 200 // max phonemes in a word
#define N_WORD_BYTES 160 // max bytes for the UTF8 characters in a word
#define N_CLAUSE_WORDS 300 // max words in a clause
+#define N_TR_SOURCE 800 // the source text of a single clause (UTF8 bytes)
+
+
#define N_RULE_GROUP2 120 // max num of two-letter rule chains
#define N_HASH_DICT 1024
#define N_CHARSETS 20
-#define N_LETTER_GROUPS 26
+#define N_LETTER_GROUPS 95 // maximum is 127-32
/* dictionary flags, word 1 */
@@ -37,25 +40,25 @@
#define FLAG_SKIPWORDS 0x80
#define FLAG_PREPAUSE 0x100
-#define FLAG_ONLY 0x200
-#define FLAG_ONLY_S 0x400
-#define BITNUM_FLAG_ONLY 9 // bit 9 is set
-#define BITNUM_FLAG_ONLY_S 10 // bit 10 is set
-
-#define FLAG_STRESS_END 0x800 /* full stress if at end of clause */
-#define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */
-#define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */
-#define FLAG_ATEND 0x4000 /* use this pronunciation if at end of clause */
-#define FLAG_SPELLWORD 0x8000 // re-translate the word as individual letters, separated by spaces
-
-#define FLAG_DOT 0x10000 /* ignore '.' after word (abbreviation) */
-#define FLAG_ABBREV 0x20000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
-#define FLAG_STEM 0x40000 // must have a suffix
-
-#define FLAG_DOUBLING 0x80000 // doubles the following consonant
-#define FLAG_ALT_TRANS 0x100000 // language specific
-#define FLAG_ALT2_TRANS 0x200000 // language specific
-
+#define FLAG_STRESS_END 0x200 // full stress if at end of clause
+#define FLAG_STRESS_END2 0x400 // full stress if at end of clause, or only followed by unstressed
+#define FLAG_UNSTRESS_END 0x800 // reduce stress at end of clause
+#define FLAG_SPELLWORD 0x1000 // re-translate the word as individual letters, separated by spaces
+#define FLAG_ABBREV 0x2000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
+#define FLAG_DOUBLING 0x4000 // doubles the following consonant
+
+#define BITNUM_FLAG_ALT 14 // bit number of FLAG_ALT_TRANS - 1
+#define FLAG_ALT_TRANS 0x8000 // language specific
+#define FLAG_ALT2_TRANS 0x10000 // language specific
+#define FLAG_ALT3_TRANS 0x20000 // language specific
+#define FLAG_ALT4_TRANS 0x40000 // language specific
+#define FLAG_ALT5_TRANS 0x80000 // language specific
+#define FLAG_ALT6_TRANS 0x100000 // language specific
+
+#define FLAG_COMBINE 0x800000 // combine with the next word
+#define FLAG_ALLOW_DOT 0x01000000 // ignore '.' after word (abbreviation)
+#define FLAG_NEEDS_DOT 0x02000000 // only if the word is followed by a dot
+#define FLAG_WAS_UNPRONOUNCABLE 0x04000000 // the unpronounceable routine was used
#define FLAG_MAX3 0x08000000 // limit to 3 repeats
#define FLAG_PAUSE1 0x10000000 // shorter prepause
#define FLAG_TEXTMODE 0x20000000 // word translates to replacement text, not phonemes
@@ -75,10 +78,21 @@
#define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */
#define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */
#define FLAG_ALLCAPS 0x400 // only if the word is all capitals
-#define BITNUM_FLAG_ALLCAPS 0x2a
#define FLAG_ACCENT 0x800 // character name is base-character name + accent name
#define FLAG_HYPHENATED 0x1000 // multiple-words, but needs hyphen between parts 1 and 2
+#define FLAG_SENTENCE 0x2000 // only if the clause is a sentence
+#define FLAG_ONLY 0x4000
+#define FLAG_ONLY_S 0x8000
+#define FLAG_STEM 0x10000 // must have a suffix
+#define FLAG_ATEND 0x20000 // use this pronunciation if at end of clause
+#define FLAG_ATSTART 0x40000 // use this pronunciation if at start of clause
+#define FLAG_NATIVE 0x80000 // not if we've switched translators
+#define FLAG_LOOKUP_SYMBOL 0x40000000 // to indicate called from Lookup()
+
+#define BITNUM_FLAG_ALLCAPS 0x2a
#define BITNUM_FLAG_HYPHENATED 0x2c
+#define BITNUM_FLAG_ONLY 0x2e
+#define BITNUM_FLAG_ONLY_S 0x2f
// wordflags, flags in source word
@@ -98,20 +112,37 @@
#define FLAG_DONT_SWITCH_TRANSLATOR 0x1000
#define FLAG_SUFFIX_REMOVED 0x2000
#define FLAG_HYPHEN_AFTER 0x4000
+#define FLAG_ORDINAL 0x8000 // passed to TranslateNumber() to indicate an ordinal number
+#define FLAG_HAS_DOT 0x10000 // dot after this word
+#define FLAG_COMMA_AFTER 0x20000 // comma after this word
+#define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab
+#define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits
+#define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word
+#define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules
+#define FLAG_TRANSLATOR2 0x400000 // retranslating using a different language
+#define FLAG_PREFIX_REMOVED 0x800000 // a prefix has been removed from this word
+
+#define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix
+#define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout
+#define FLAG_NO_PREFIX 0x20000000
+#define FLAG_UNPRON_TEST 0x80000000 // do unpronounability test on the beginning of the word
-#define FLAG_NO_TRACE 0x10000 // passed to TranslateRules() to suppress dictionary lookup printout
-#define FLAG_NO_PREFIX 0x20000
// prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000
#define SUFX_E 0x0100 // e may have been added
#define SUFX_I 0x0200 // y may have been changed to i
#define SUFX_P 0x0400 // prefix
#define SUFX_V 0x0800 // suffix means use the verb form pronunciation
-#define SUFX_D 0x1000 // previous letter may have been doubles
+#define SUFX_D 0x1000 // previous letter may have been doubled
#define SUFX_F 0x2000 // verb follows
#define SUFX_Q 0x4000 // don't retranslate
#define SUFX_T 0x10000 // don't affect the stress position in the stem
#define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)
+#define SUFX_A 0x40000 // remember that the suffix starts with a vowel
+#define SUFX_M 0x80000 // bit 19, allow multiple suffixes
+
+#define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules
+
#define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes
#define FLAG_SUFX 0x04
@@ -127,29 +158,36 @@
#define RULE_CONDITION 5 // followed by condition number (byte)
#define RULE_GROUP_START 6
#define RULE_GROUP_END 7
-#define RULE_LINENUM 8 // next 2 bytes give a line number, for debugging purposes
+#define RULE_PRE_ATSTART 8 // as RULE_PRE but also match with 'start of word'
+#define RULE_LINENUM 9 // next 2 bytes give a line number, for debugging purposes
#define RULE_SPACE 32 // ascii space
-#define RULE_SYLLABLE 9
-#define RULE_STRESSED 10
-#define RULE_DOUBLE 11
-#define RULE_INC_SCORE 12
-#define RULE_DEL_FWD 13
-#define RULE_ENDING 14
+#define RULE_SYLLABLE 21 // @
+#define RULE_STRESSED 10 // &
+#define RULE_DOUBLE 11 // %
+#define RULE_INC_SCORE 12 // +
+#define RULE_DEL_FWD 13 // #
+#define RULE_ENDING 14 // S
#define RULE_DIGIT 15 // D digit
#define RULE_NONALPHA 16 // Z non-alpha
#define RULE_LETTERGP 17 // A B C H F G Y letter group number
#define RULE_LETTERGP2 18 // L + letter group number
-#define RULE_CAPITAL 19 // word starts with a capital letter
+#define RULE_CAPITAL 19 // ! word starts with a capital letter
#define RULE_REPLACEMENTS 20 // section for character replacements
+#define RULE_SKIPCHARS 23 // J
#define RULE_NO_SUFFIX 24 // N
#define RULE_NOTVOWEL 25 // K
#define RULE_IFVERB 26 // V
-#define RULE_ALT1 28 // T word has $alt attribute
+#define RULE_DOLLAR 28 // $ commands
#define RULE_NOVOWELS 29 // X no vowels up to word boundary
#define RULE_SPELLING 31 // W while spelling letter-by-letter
#define RULE_LAST_RULE 31
+#define DOLLAR_UNPR 0x01
+#define DOLLAR_NOPREFIX 0x02
+#define DOLLAR_LIST 0x03
+
+
#define LETTERGP_A 0
#define LETTERGP_B 1
#define LETTERGP_C 2
@@ -161,30 +199,37 @@
// Punctuation types returned by ReadClause()
-// bits 0-7 pause x 10mS, bits 12-14 intonation type,
+// bits 0-11 pause x 10mS
+// bits12-14 intonation type
+// bit 15- don't need space after the punctuation
// bit 19=sentence, bit 18=clause, bits 17=voice change
// bit 16 used to distinguish otherwise identical types
// bit 20= punctuation character can be inside a word (Armenian)
+// bit 21= speak the name of the punctuation character
+// bit 22= dot after the last word
+// bit 23= pause is x 320mS (not x 10mS)
+
#define CLAUSE_BIT_SENTENCE 0x80000
+#define CLAUSE_BIT_CLAUSE 0x40000
#define CLAUSE_BIT_VOICE 0x20000
+#define CLAUSE_BITS_INTONATION 0x7000
#define PUNCT_IN_WORD 0x100000
-
-#define CLAUSE_NONE 0 + 0x04000
-#define CLAUSE_PARAGRAPH 70 + 0x80000
-#define CLAUSE_EOF 35 + 0x90000
-#define CLAUSE_VOICE 0 + 0x24000
-#define CLAUSE_PERIOD 35 + 0x80000
-#define CLAUSE_COMMA 20 + 0x41000
-#define CLAUSE_SHORTCOMMA 4 + 0x41000
-#define CLAUSE_SHORTFALL 4 + 0x40000
-#define CLAUSE_QUESTION 35 + 0x82000
-#define CLAUSE_EXCLAMATION 40 + 0x83000
-#define CLAUSE_COLON 30 + 0x40000
-#ifdef PLATFORM_RISCOS
-#define CLAUSE_SEMICOLON 30 + 0x40000
-#else
-#define CLAUSE_SEMICOLON 30 + 0x41000
-#endif
+#define PUNCT_SAY_NAME 0x200000
+#define CLAUSE_DOT 0x400000
+#define CLAUSE_PAUSE_LONG 0x800000
+
+#define CLAUSE_NONE ( 0 + 0x04000)
+#define CLAUSE_PARAGRAPH (70 + 0x80000)
+#define CLAUSE_EOF (40 + 0x90000)
+#define CLAUSE_VOICE ( 0 + 0x24000)
+#define CLAUSE_PERIOD (40 + 0x80000)
+#define CLAUSE_COMMA (20 + 0x41000)
+#define CLAUSE_SHORTCOMMA ( 4 + 0x41000)
+#define CLAUSE_SHORTFALL ( 4 + 0x40000)
+#define CLAUSE_QUESTION (40 + 0x82000)
+#define CLAUSE_EXCLAMATION (45 + 0x83000)
+#define CLAUSE_COLON (30 + 0x40000)
+#define CLAUSE_SEMICOLON (30 + 0x41000)
#define SAYAS_CHARS 0x12
#define SAYAS_GLYPHS 0x13
@@ -194,6 +239,7 @@
#define SAYAS_DIGITS1 0xc1
#define CHAR_EMPHASIS 0x0530 // this is an unused character code
+#define CHAR_COMMA_BREAK 0x0557 // unused character code
// Rule:
// [4] [match] [1 pre] [2 post] [3 phonemes] 0
@@ -203,32 +249,23 @@
typedef const char * constcharptr;
typedef struct {
- int points;
+ int points;
const char *phonemes;
- int end_type;
+ int end_type;
char *del_fwd;
} MatchRecord;
-
+
// used to mark words with the source[] buffer
typedef struct{
+ unsigned int flags;
unsigned short start;
- unsigned short sourceix;
- unsigned short flags;
unsigned char pre_pause;
unsigned char wmark;
+ unsigned short sourceix;
unsigned char length;
} WORD_TAB;
-// a clause translated into phoneme codes (first stage)
-typedef struct {
- unsigned char phcode;
- unsigned char stress;
- unsigned char tone_number;
- unsigned char synthflags;
- unsigned short sourceix;
-} PHONEME_LIST2;
-
typedef struct {
int type;
@@ -239,8 +276,25 @@ extern PARAM_STACK param_stack[];
extern const int param_defaults[N_SPEECH_PARAM];
-
-#define N_LOPTS 16
+typedef struct {
+ const char *name;
+ int offset;
+ unsigned short range_min, range_max;
+ int language;
+ int flags;
+} ALPHABET;
+
+extern ALPHABET alphabets[];
+extern ALPHABET *current_alphabet;
+// alphabet flags
+#define AL_DONT_NAME 0x01 // don't speak the alphabet name
+#define AL_NOT_LETTERS 0x02 // don't use the language for speaking letters
+#define AL_WORDS 0x04 // use the language to speak words
+#define AL_NOT_CODE 0x08 // don't speak the character code
+#define AL_NO_SYMBOL 0x10 // don't repeat "symbol" or "character"
+
+
+#define N_LOPTS 21
#define LOPT_DIERESES 1
// 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
// bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word
@@ -250,9 +304,12 @@ extern const int param_defaults[N_SPEECH_PARAM];
#define LOPT_PREFIXES 3
// non-zero, change voiced/unoiced to match last consonant in a cluster
- // bit 1=LANG=ru, don't propagate over [v]
+ // bit 0=use regressive voicing
+ // bit 1=LANG=cz,bg don't propagate over [v]
// bit 2=don't propagate acress word boundaries
// bit 3=LANG=pl, propagate over liquids and nasals
+ // bit 4=LANG=cz,sk don't progagate to [v]
+ // bit 8=devoice word-final consonants
#define LOPT_REGRESSIVE_VOICING 4
// 0=default, 1=no check, other allow this character as an extra initial letter (default is 's')
@@ -264,7 +321,7 @@ extern const int param_defaults[N_SPEECH_PARAM];
// increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
#define LOPT_SONORANT_MIN 7
- // don't break vowels at word boundary
+ // bit 0: don't break vowels at word boundary
#define LOPT_WORD_MERGE 8
// max. amplitude for vowel at the end of a clause
@@ -292,8 +349,33 @@ extern const int param_defaults[N_SPEECH_PARAM];
#define LOPT_IT_DOUBLING 14
// Call ApplySpecialAttributes() if $alt or $alt2 is set for a word
+ // bit 1: stressed syllable: $alt change [e],[o] to [E],[O], $alt2 change [E],[O] to [e],[o]
#define LOPT_ALT 15
+ // pause for bracket (default=4), pause when annoucing bracket names (default=2)
+#define LOPT_BRACKET_PAUSE 16
+
+ // bit 1, don't break clause before annoucning . ? !
+#define LOPT_ANNOUNCE_PUNCT 17
+
+ // recognize long vowels (0 = don't recognize)
+#define LOPT_LONG_VOWEL_THRESHOLD 18
+
+ // bit 0: Don't allow suffices if there is no previous syllable
+#define LOPT_SUFFIX 19
+
+ // bit 0 Apostrophe at start of word is part of the word
+ // bit 1 Apostrophe at end of word is part of the word
+#define LOPT_APOSTROPHE 20
+
+
+// stress_rule
+#define STRESSPOSN_1L 0 // 1st syllable
+#define STRESSPOSN_2L 1 // 2nd syllable
+#define STRESSPOSN_2R 2 // penultimate
+#define STRESSPOSN_1R 3 // final syllable
+#define STRESSPOSN_3R 4 // antipenultimate
+
typedef struct {
// bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
@@ -304,34 +386,99 @@ typedef struct {
int vowel_pause;
int stress_rule; // 1=first syllable, 2=penultimate, 3=last
-// bit0=don't stress monosyllables, except at end of clause
+#define S_NO_DIM 0x02
+#define S_FINAL_DIM 0x04
+#define S_FINAL_DIM_ONLY 0x06
// bit1=don't set diminished stress,
// bit2=mark unstressed final syllables as diminished
+
+// bit3=set consecutive unstressed syllables in unstressed words to diminished, but not in stressed words
+
+#define S_FINAL_NO_2 0x10
// bit4=don't allow secondary stress on last syllable
+
+#define S_NO_AUTO_2 0x20
// bit5-don't use automatic secondary stress
+
+#define S_2_TO_HEAVY 0x40
// bit6=light syllable followed by heavy, move secondary stress to the heavy syllable. LANG=Finnish
+
+#define S_FIRST_PRIMARY 0x80
+// bit7=if more than one primary stress, make the subsequent primaries to secondary stress
+
+#define S_FINAL_STRESS_C 0x100
// bit8=stress last syllable if it doesn't end in a vowel
+
+#define S_FINAL_SPANISH 0x200
// bit9=stress last syllable if it doesn't end in vowel or "s" or "n" LANG=Spanish
+
+#define S_2_SYL_2 0x1000
// bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
+
+#define S_INITIAL_2 0x2000
// bit13= If there is only one syllable before the primary stress, give it a secondary stress
-// bit15= Give stress to the first unstressed syllable
-// bit16= Don't diminish consecutive syllables within a word.
+
+#define S_MID_DIM 0x10000
+// bit 16= Set (not first or last) syllables to diminished stress
+
+#define S_PRIORITY_STRESS 0x20000
// bit17= "priority" stress reduces other primary stress to "unstressed" not "secondary"
+
+#define S_EO_CLAUSE1 0x40000
// bit18= don't lengthen short vowels more than long vowels at end-of-clause
+
+#define S_FINAL_LONG 0x80000
// bit19=stress on final syllable if it has a long vowel, but previous syllable has a short vowel
- int stress_flags;
+
+#define S_HYPEN_UNSTRESS 0x100000
+// bit20= hyphenated words, 2nd part is unstressed
+
+#define S_NO_EOC_LENGTHEN 0x200000
+// bit21= don't lengthen vowels at end-of-clause
+
+// bit15= Give stress to the first unstressed syllable
+
+
+ int stress_flags;
int unstressed_wd1; // stress for $u word of 1 syllable
int unstressed_wd2; // stress for $u word of >1 syllable
int param[N_LOPTS];
+ int param2[N_LOPTS];
unsigned char *length_mods;
unsigned char *length_mods0;
-#define NUM_ROMAN 0x20000
-#define NUM_ROMAN_UC 0x40000
-#define NUM_NOPAUSE 0x80000
-#define NUM_ROMAN_AFTER 0x200000
-#define NUM_VIGESIMAL 0x400000
+#define NUM_THOUS_SPACE 0x4
+#define NUM_DECIMAL_COMMA 0x8
+#define NUM_SWAP_TENS 0x10
+#define NUM_AND_UNITS 0x20
+#define NUM_HUNDRED_AND 0x40
+#define NUM_SINGLE_AND 0x80
+#define NUM_SINGLE_STRESS 0x100
+#define NUM_SINGLE_VOWEL 0x200
+#define NUM_OMIT_1_HUNDRED 0x400
+#define NUM_1900 0x800
+#define NUM_ALLOW_SPACE 0x1000
+#define NUM_DFRACTION_1 0x2000
+#define NUM_DFRACTION_2 0x4000
+#define NUM_DFRACTION_3 0x6000
+#define NUM_DFRACTION_4 0x8000
+#define NUM_DFRACTION_5 0xa000
+#define NUM_DFRACTION_6 0xc000
+#define NUM_DFRACTION_7 0xe000 // lang=si, alternative form of number for decimal fraction digits (except the last)
+#define NUM_ORDINAL_DOT 0x10000
+#define NUM_NOPAUSE 0x20000
+#define NUM_AND_HUNDRED 0x40000
+#define NUM_THOUSAND_AND 0x80000
+#define NUM_VIGESIMAL 0x100000
+#define NUM_OMIT_1_THOUSAND 0x200000
+#define NUM_ZERO_HUNDRED 0x400000
+#define NUM_HUNDRED_AND_DIGIT 0x800000
+#define NUM_ROMAN 0x1000000
+#define NUM_ROMAN_CAPITALS 0x2000000
+#define NUM_ROMAN_AFTER 0x4000000
+#define NUM_ROMAN_ORDINAL 0x8000000
+#define NUM_SINGLE_STRESS_L 0x10000000
// bits0-1=which numbers routine to use.
// bit2= thousands separator must be space
@@ -346,35 +493,66 @@ typedef struct {
// bit11=say 19** as nineteen hundred
// bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
// bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)
- // bit16=dot after number indicates ordinal
- // bit17=recognize roman numbers
- // bit18=Roman numbers only if upper case
- // bit19=don't add pause after a number
- // bit20='and' before hundreds
- // bit21= say "roman" after the number, not before
- // bit22= vigesimal number, if tens are not found
+
+ // bit16= dot after number indicates ordinal
+ // bit17= don't add pause after a number
+ // bit18= 'and' before hundreds
+ // bit19= 'and' after thousands if there are no hundreds
+ // bit20= vigesimal number, if tens are not found
+ // bit21= omit "one" before "thousand"
+ // bit22= say "zero" before hundred
+ // bit23= add "and" after hundreds and thousands, only if there are digits and no tens
+
+ // bit24= recognize roman numbers
+ // bit25= Roman numbers only if upper case
+ // bit26= say "roman" after the number, not before
+ // bit27= Roman numbers are ordinal numbers
+ // bit28= only one primary stress in tens+units (on the tens)
int numbers;
-#define NUM2_100000 0x800 // numbers for 100,000 and 10,000,000
-#define NUM2_100000a 0xc00 // numbers for 100,000 and 1,000,000
+#define NUM2_THOUSANDS_VAR1 0x40
+#define NUM2_THOUSANDS_VAR2 0x80
+#define NUM2_THOUSANDS_VAR3 0xc0
+#define NUM2_THOUSANDS_VAR4 0x100
+#define NUM2_THOUSANDS_VAR5 0x140
+
+#define NUM2_ORDINAL_NO_AND 0x800
+#define NUM2_MULTIPLE_ORDINAL 0x1000
+#define NUM2_NO_TEEN_ORDINALS 0x2000
+#define NUM2_MYRIADS 0x4000
+#define NUM2_ENGLISH_NUMERALS 0x8000
+#define NUM2_PERCENT_BEFORE 0x10000
+#define NUM2_OMIT_1_HUNDRED_ONLY 0x20000
+#define NUM2_ORDINAL_AND_THOUSANDS 0x40000
// bits 1-4 use variant form of numbers before thousands,millions,etc.
- // bit6=(LANG=pl) two forms of plural, M or MA
- // bit7=(LANG-ru) use MB for 1 thousand, million, etc
- // bit8=(LANG=cs,sk) two forms of plural, M or MA
+ // bits 6-8 use different forms of thousand, million, etc (M MA MB)
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
- // bit10=(LANG=sw) special word for 100,000 and 1,000,000
- // bit11=(LANG=hi) special word for 100,000 and 10,000,000
+ // bit11=(LANG=es,an) don't say 'and' between tens and units for ordinal numbers
+ // bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
+ // bit13=(LANG=pt) don't use 11-19 numbers to make ordinals
+ // bit14=(LANG=ko) use myriads (groups of 4 digits) not thousands (groups of 3)
+ // bit15=(LANG=ne) speak (non-replaced) English numerals in English
+ // bit16=(LANG=si) say "%" before the number
+ // bit17=(LANG=ml) omit "one" before hundred only if there are no previous digits
+ // bit18=(LANG=ta) same variant for ordinals and thousands (#o = #a)
int numbers2;
+#define BREAK_THOUSANDS 0x49249248
+ int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000)
int max_roman;
+ int min_roman;
int thousands_sep;
int decimal_sep;
+ int max_digits; // max number of digits which can be spoken as an integer number (rather than individual digits)
+ const char *ordinal_indicator; // UTF-8 string
+ const char *roman_suffix; // add this (ordinal) suffix to Roman numbers (LANG=an)
// bit 0, accent name before the letter name, bit 1 "capital" after letter name
int accents;
int tone_language; // 1=tone language
int intonation_group;
+ unsigned char tunes[6];
int long_stop; // extra mS pause for a lengthened stop
int phoneme_change; // TEST, change phonemes, after translation
char max_initial_consonants;
@@ -382,10 +560,21 @@ typedef struct {
char tone_numbers;
char ideographs; // treat as separate words
char textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
+ char dotless_i; // uses letter U+0131
int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2"
int listx; // compile *_listx after *list
const unsigned int *replace_chars; // characters to be substitutes
- const char *ascii_language; // switch to this language for Latin characters
+ char ascii_language[8]; // switch to this language for Latin characters
+ int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset)
+ int alt_alphabet; // offset for another language to recognize
+ int alt_alphabet_lang; // language for the alt_alphabet
+ int max_lengthmod;
+ int lengthen_tonic; // lengthen the tonic syllable
+ int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character
+
+#define DICTDIALECT_EN_US 1 // bit number
+#define DICTDIALECT_ES_LA 2
+ int dict_dialect; // bitmap, use a dialect for foreign words
} LANGUAGE_OPTIONS;
@@ -401,30 +590,29 @@ typedef struct {
-#define NUM_SEP_DOT 0x0008 // . , for thousands and decimal separator
-#define NUM_SEP_SPACE 0x1000 // allow space as thousands separator (in addition to langopts.thousands_sep)
-#define NUM_DEC_IT 0x2000 // (LANG=it) speak post-decimal-point digits as a combined number not as single digits
-
-typedef struct Translator
-{//=============
+typedef struct
+{//===========
LANGUAGE_OPTIONS langopts;
int translator_name;
- int transpose_offset;
int transpose_max;
int transpose_min;
+ const char *transpose_map;
+ char dictionary_name[40];
- char phon_out[300];
char phonemes_repeat[20];
- int phonemes_repeat_count;
+ int phonemes_repeat_count;
+ int phoneme_tab_ix;
unsigned char stress_amps[8];
unsigned char stress_amps_r[8];
short stress_lengths[8];
int dict_condition; // conditional apply some pronunciation rules and dict.lookups
+ int dict_min_size;
const unsigned short *charset_a0; // unicodes for characters 0xa0 to oxff
const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words
const wchar_t *punct_within_word; // allow these punctuation characters within words
+ const unsigned short *chars_ignore;
// holds properties of characters: vowel, consonant, etc for pronunciation rules
unsigned char letter_bits[256];
@@ -443,16 +631,17 @@ typedef struct Translator
// groups1 and groups2 are indexes into data_dictrules, set up by InitGroups()
// the two-letter rules for each letter must be consecutive in the language_rules source
-
+
char *groups1[256]; // translation rule lists, index by single letter
+ char *groups3[128]; // index by offset letter
char *groups2[N_RULE_GROUP2]; // translation rule lists, indexed by two-letter pairs
unsigned int groups2_name[N_RULE_GROUP2]; // the two letter pairs for groups2[]
int n_groups2; // number of groups2[] entries used
-
+
unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter
unsigned char groups2_start[256]; // index into groups2
-
-
+ const short *frequent_pairs; // list of frequent pairs of letters, for use in compressed *_list
+
int expect_verb;
int expect_past; // expect past tense
int expect_verb_s;
@@ -462,14 +651,15 @@ typedef struct Translator
int word_vowel_count; // number of vowels so far
int word_stressed_count; // number of vowels so far which could be stressed
-
+
int clause_upper_count; // number of upper case letters in the clause
int clause_lower_count; // number of lower case letters in the clause
int prepause_timeout;
int end_stressed_vowel; // word ends with stressed vowel
- int prev_dict_flags; // dictionary flags from previous word
-} Translator; // end of class Translator
+ int prev_dict_flags[2]; // dictionary flags from previous word
+ int clause_terminator;
+} Translator;
extern int option_tone2;
@@ -479,6 +669,7 @@ extern int option_tone_flags;
extern int option_waveout;
extern int option_quiet;
extern int option_phonemes;
+extern int option_mbrola_phonemes;
extern int option_phoneme_events;
extern int option_linelength; // treat lines shorter than this as end-of-clause
extern int option_multibyte;
@@ -513,8 +704,8 @@ extern char skip_marker[N_MARKER_LENGTH];
extern wchar_t option_punctlist[N_PUNCTLIST]; // which punctuation characters to announce
extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
-extern struct Translator *translator;
-extern struct Translator *translator2;
+extern Translator *translator;
+extern Translator *translator2;
extern const unsigned short *charsets[N_CHARSETS];
extern char dictionary_name[40];
extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
@@ -524,15 +715,16 @@ extern int dictionary_skipwords;
extern int (* uri_callback)(int, const char *, const char *);
extern int (* phoneme_callback)(const char *);
-extern void SetLengthMods(struct Translator *tr, int value);
+extern void SetLengthMods(Translator *tr, int value);
void LoadConfig(void);
-int TransposeAlphabet(char *text, int offset, int min, int max);
+int TransposeAlphabet(Translator *tr, char *text);
int utf8_in(int *c, const char *buf);
int utf8_in2(int *c, const char *buf, int backwards);
int utf8_out(unsigned int c, char *buf);
int utf8_nbytes(const char *buf);
int lookupwchar(const unsigned short *list,int c);
+int lookupwchar2(const unsigned short *list,int c);
int Eof(void);
char *strchr_w(const char *s, int c);
int IsBracket(int c);
@@ -540,23 +732,35 @@ void InitNamedata(void);
void InitText(int flags);
void InitText2(void);
int IsDigit(unsigned int c);
+int IsDigit09(unsigned int c);
int IsAlpha(unsigned int c);
+int IsVowel(Translator *tr, int c);
+int IsSuperscript(int letter);
+int iswalpha2(int c);
int isspace2(unsigned int c);
+int iswlower2(int c);
+int iswupper2(int c);
int towlower2(unsigned int c);
-void GetTranslatedPhonemeString(char *phon_out, int n_phon_out);
+int towupper2(unsigned int c);
+const char *GetTranslatedPhonemeString(int phoneme_mode);
+const char *WordToString2(unsigned int word);
+ALPHABET *AlphabetFromChar(int c);
+ALPHABET *AlphabetFromName(const char *name);
-struct Translator *SelectTranslator(const char *name);
+Translator *SelectTranslator(const char *name);
int SetTranslator2(const char *name);
-void DeleteTranslator(struct Translator *tr);
-int Lookup(struct Translator *tr, const char *word, char *ph_out);
+void DeleteTranslator(Translator *tr);
+void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts);
+int Lookup(Translator *tr, const char *word, char *ph_out);
+int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out);
-int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, int wflags);
-int TranslateRoman(Translator *tr, char *word, char *ph_out);
+int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control);
+int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab);
void ChangeWordStress(Translator *tr, char *word, int new_stress);
void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars);
-int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control, int word_length);
-void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf);
+int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control);
+void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf, int control);
void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf);
int LoadDictionary(Translator *tr, const char *name, int no_error);
@@ -564,7 +768,6 @@ int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *f
void MakePhonemeList(Translator *tr, int post_pause, int new_sentence);
int ChangePhonemes_ru(Translator *tr, PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);
-void ApplySpecialAttribute(Translator *tr, char *phonemes, int dict_flags);
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);
@@ -572,13 +775,18 @@ void CalcLengths(Translator *tr);
void CalcPitches(Translator *tr, int clause_tone);
int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);
-int Unpronouncable(Translator *tr, char *word);
+int Unpronouncable(Translator *tr, char *word, int posn);
void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int prev_stress);
int TranslateRules(Translator *tr, char *p, char *phonemes, int size, char *end_phonemes, int end_flags, unsigned int *dict_flags);
-int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab);
+int TranslateWord(Translator *tr, char *word1, int next_pause, WORD_TAB *wtab, char *word_out);
void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *tone, char **voice_change);
-int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type);
+int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change);
-void SetVoiceStack(espeak_VOICE *v);
+void SetVoiceStack(espeak_VOICE *v, const char *variant_name);
+void InterpretPhoneme(Translator *tr, int control, PHONEME_LIST *plist, PHONEME_DATA *phdata, WORD_PH_DATA *worddata);
+void InterpretPhoneme2(int phcode, PHONEME_DATA *phdata);
+char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags);
extern FILE *f_trans; // for logging
+extern FILE *f_logespeak;
+extern int logging_type; // from config file