1 files changed, 785 insertions, 248 deletions
diff --git a/navit/support/espeak/readclause.c b/navit/support/espeak/readclause.c
index b8d302a27..172ebad15 100644
--- a/navit/support/espeak/readclause.c
+++ b/navit/support/espeak/readclause.c
@@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
+ *   Copyright (C) 2005 to 2014 by Jonathan Duddington                     *
  *   email: jonsd@users.sourceforge.net                                    *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
@@ -41,7 +41,7 @@
 #include <locale.h>
 #define N_XML_BUF   256
 
-#define double(x) ((double)(x))
+
 static const char *xmlbase = "";    // base URL from <speak>
 
 static int namedata_ix=0;
@@ -58,20 +58,21 @@ static const char *ungot_word = NULL;
 static int end_of_input;
 
 static int ignore_text=0;   // set during <sub> ... </sub>  to ignore text which has been replaced by an alias
+static int audio_text=0;    // set during <audio> ... </audio>
 static int clear_skipping_text = 0;  // next clause should clear the skipping_text flag
 int count_characters = 0;
 static int sayas_mode;
+static int sayas_start;
 static int ssml_ignore_l_angle = 0;
 
-static const char *punct_stop = ".:!?";    // pitch fall if followed by space
-static const char *punct_close = ")]}>;'\"";  // always pitch fall unless followed by alnum
-
 // alter tone for announce punctuation or capitals
-static const char *tone_punct_on = "\0016T";  // add reverberation, lower pitch
-static const char *tone_punct_off = "\001T";
+//static const char *tone_punct_on = "\0016T";  // add reverberation, lower pitch
+//static const char *tone_punct_off = "\001T\001P";
 
 // punctuations symbols that can end a clause
 static const unsigned short punct_chars[] = {',','.','?','!',':',';',
+  0x00a1,  // inverted exclamation
+  0x00bf,  // inverted question
   0x2013,  // en-dash
   0x2014,  // em-dash
   0x2026,  // elipsis
@@ -86,6 +87,15 @@ static const unsigned short punct_chars[] = {',','.','?','!',':',';',
   0x055e,  // Armenian question
   0x055b,  // Armenian emphasis mark
 
+  0x060c,  // Arabic ,
+  0x061b,  // Arabic ;
+  0x061f,  // Arabic ?
+  0x06d4,  // Arabic .
+
+  0x0df4,  // Singhalese Kunddaliya
+  0x0f0d,  // Tibet Shad
+  0x0f0e,
+
   0x1362,  // Ethiopic period
   0x1363,
   0x1364,
@@ -93,6 +103,7 @@ static const unsigned short punct_chars[] = {',','.','?','!',':',';',
   0x1366,
   0x1367,
   0x1368,
+  0x10fb,  // Georgian paragraph
 
   0x3001,  // ideograph comma
   0x3002,  // ideograph period
@@ -103,7 +114,7 @@ static const unsigned short punct_chars[] = {',','.','?','!',':',';',
   0xff1a,  // fullwidth colon
   0xff1b,  // fullwidth semicolon
   0xff1f,  // fullwidth question mark
-  
+
   0};
 
 
@@ -111,19 +122,30 @@ static const unsigned short punct_chars[] = {',','.','?','!',':',';',
 // bits 0-7 pause x 10mS, bits 12-14 intonation type, bit 15 don't need following space or bracket
 static const unsigned int punct_attributes [] = { 0,
   CLAUSE_COMMA, CLAUSE_PERIOD, CLAUSE_QUESTION, CLAUSE_EXCLAMATION, CLAUSE_COLON, CLAUSE_SEMICOLON,
+  CLAUSE_SEMICOLON | 0x8000,      // inverted exclamation
+  CLAUSE_SEMICOLON | 0x8000,      // inverted question
   CLAUSE_SEMICOLON,  // en-dash
   CLAUSE_SEMICOLON,  // em-dash
-  CLAUSE_SEMICOLON,  // elipsis
+  CLAUSE_SEMICOLON | PUNCT_SAY_NAME | 0x8000,      // elipsis
 
   CLAUSE_QUESTION,   // Greek question mark
   CLAUSE_SEMICOLON,  // Greek semicolon
-  CLAUSE_PERIOD+0x8000,     // Devanagari Danda (fullstop)
+  CLAUSE_PERIOD | 0x8000,     // Devanagari Danda (fullstop)
 
-  CLAUSE_PERIOD+0x8000,  // Armenian period
+  CLAUSE_PERIOD | 0x8000,  // Armenian period
   CLAUSE_COMMA,     // Armenian comma
-  CLAUSE_EXCLAMATION + PUNCT_IN_WORD,  // Armenian exclamation
-  CLAUSE_QUESTION + PUNCT_IN_WORD,  // Armenian question
-  CLAUSE_PERIOD + PUNCT_IN_WORD,  // Armenian emphasis mark
+  CLAUSE_EXCLAMATION | PUNCT_IN_WORD,  // Armenian exclamation
+  CLAUSE_QUESTION | PUNCT_IN_WORD,  // Armenian question
+  CLAUSE_PERIOD | PUNCT_IN_WORD,  // Armenian emphasis mark
+
+  CLAUSE_COMMA,      // Arabic ,
+  CLAUSE_SEMICOLON,  // Arabic ;
+  CLAUSE_QUESTION,   // Arabic question mark
+  CLAUSE_PERIOD,     // Arabic full stop
+
+  CLAUSE_PERIOD+0x8000,     // Singhalese period
+  CLAUSE_PERIOD+0x8000,     // Tibet period
+  CLAUSE_PARAGRAPH,
 
   CLAUSE_PERIOD,     // Ethiopic period
   CLAUSE_COMMA,      // Ethiopic comma
@@ -131,7 +153,8 @@ static const unsigned int punct_attributes [] = { 0,
   CLAUSE_COLON,      // Ethiopic colon
   CLAUSE_COLON,      // Ethiopic preface colon
   CLAUSE_QUESTION,   // Ethiopic question mark
-  CLAUSE_PERIOD,     // Ethiopic paragraph
+  CLAUSE_PARAGRAPH,     // Ethiopic paragraph
+  CLAUSE_PARAGRAPH,     // Georgian paragraph
 
   CLAUSE_COMMA+0x8000,      // ideograph comma
   CLAUSE_PERIOD+0x8000,     // ideograph period
@@ -151,7 +174,7 @@ static const unsigned int punct_attributes [] = { 0,
 // frame 0 is for the defaults, before any ssml tags.
 typedef struct {
 	int tag_type;
-	int voice_variant;
+	int voice_variant_number;
 	int voice_gender;
 	int voice_age;
 	char voice_name[40];
@@ -162,6 +185,8 @@ typedef struct {
 static int n_ssml_stack;
 static SSML_STACK ssml_stack[N_SSML_STACK];
 
+static espeak_VOICE base_voice;
+static char base_voice_variant_name[40] = {0};
 static char current_voice_id[40] = {0};
 
 
@@ -170,10 +195,11 @@ static int n_param_stack;
 PARAM_STACK param_stack[N_PARAM_STACK];
 
 static int speech_parameters[N_SPEECH_PARAM];     // current values, from param_stack
+int saved_parameters[N_SPEECH_PARAM];             //Parameters saved on synthesis start
 
 const int param_defaults[N_SPEECH_PARAM] = {
    0,     // silence (internal use)
-  170,    // rate wpm
+  175,    // rate wpm
   100,    // volume
    50,    // pitch
    50,    // range
@@ -190,49 +216,90 @@ const int param_defaults[N_SPEECH_PARAM] = {
 };
 
 
-#ifdef NEED_WCHAR_FUNCTIONS
+// additional Latin characters beyond the ascii character set
+#define MAX_WALPHA  0x24f
+// indexed by character - 0x80
+// 0=not alphabetic, 0xff=lower case, 0xfe=no case, 0xfd=use wchar_tolower
+//   other=value to add to upper case to convert to lower case
+static unsigned char walpha_tab[MAX_WALPHA-0x7f] = {
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 080
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, // 090
+    0,    0,    0,    0,    0,    0,    0,    0,    0,    0, 0xfe,    0,    0,    0,    0,    0, // 0a0
+    0,    0,    0,    0,    0, 0xff,    0,    0,    0,    0, 0xfe,    0,    0,    0,    0,    0, // 0b0
+   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32,   32, // 0c0
+   32,   32,   32,   32,   32,   32,   32,    0,   32,   32,   32,   32,   32,   32,   32, 0xff, // 0d0
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 0e0
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,    0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 0f0
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 100
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 110
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 120
+ 0xfd, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, 0xfe,    1, 0xff,    1, 0xff,    1, 0xff,    1, // 130
+ 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, 0xfe,    1, 0xff,    1, 0xff,    1, 0xff, // 140
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 150
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 160
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, 0xfd,    1, 0xff,    1, 0xff,    1, 0xff, 0xff, // 170
+ 0xff,  210,    1, 0xff,    1, 0xff,  206,    1, 0xff,  205,  205,    1, 0xff, 0xfe,   79,  202, // 180
+  203,    1, 0xff,  205,  207, 0xff,  211,  209,    1, 0xff, 0xff, 0xfe,  211,  213, 0xff,  214, // 190
+    1, 0xff,    1, 0xff,    1, 0xff,  218,    1, 0xff,  218, 0xfe, 0xfe,    1, 0xff,  218,    1, // 1a0
+ 0xff,  217,  217,    1, 0xff,    1, 0xff,  219,    1, 0xff, 0xfe, 0xfe,    1, 0xff, 0xfe, 0xff, // 1b0
+ 0xfe, 0xfe, 0xfe, 0xfe,    2, 0xff, 0xff,    2, 0xff, 0xff,    2, 0xff, 0xff,    1, 0xff,    1, // 1c0
+ 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, 0xff,    1, 0xff, // 1d0
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 1e0
+ 0xfe,    2, 0xff, 0xff,    1, 0xff, 0xfd, 0xfd,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 1f0
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 200
+    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 210
+ 0xfd, 0xfe,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff, // 220
+    1, 0xff,    1, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfd,    1, 0xff, 0xfd, 0xfd, 0xfe, // 230
+ 0xfe,    1, 0xff, 0xfd,   69,   71,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff,    1, 0xff}; // 240
+
+static const short wchar_tolower[] = {
+	0x130, 0x069,
+	0x178, 0x0ff,
+	0x1f6, 0x195,
+	0x1f7, 0x1bf,
+	0x220, 0x19e,
+	0x23a, 0x2c65,
+	0x23d, 0x19a,
+	0x23e, 0x2c66,
+	0x243, 0x180,
+	0,0 };
+
+static const short wchar_toupper[] = {
+	0x0b5, 0x39c,
+	0x0df, 0x0df,
+	0x0ff, 0x178,
+	0x131, 0x049,
+	0x17f, 0x053,
+	0x180, 0x243,
+	0x195, 0x1f6,
+	0x19a, 0x23d,
+	0x19e, 0x220,
+	0x1bf, 0x1f7,
+	0x1c6, 0x1c4,
+	0x1c9, 0x1c7,
+	0x1cc, 0x1ca,
+	0x1dd, 0x18e,
+	0x1f3, 0x1f1,
+	0,0 };
 
-// additional Latin characters beyond the Latin1 character set
-#define MAX_WALPHA  0x233
-// indexed by character - 0x100
-// 0=not alphabetic, 0xff=lower case, other=value to add to upper case to convert to lower case
-static unsigned char walpha_tab[MAX_WALPHA-0xff] = {
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 100
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 110
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 120
-   0xff,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,   1,  // 130
-   0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,  // 140
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 150
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 160
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,  // 170
-   0xff, 210,   1,0xff,   1,0xff, 206,   1,0xff, 205, 205,   1,0xff,0xff,  79, 202,  // 180
-    203,   1,0xff, 205, 207,0xff, 211, 209,   1,0xff,0xff,0xff, 211, 213,0xff, 214,  // 190
-      1,0xff,   1,0xff,   1,0xff, 218,   1,0xff, 218,0xff,0xff,   1,0xff, 218,   1,  // 1a0
-   0xff, 217, 217,   1,0xff,   1,0xff, 219,   1,0xff,0xff,0xff,   1,0xff,0xff,0xff,  // 1b0
-   0xff,0xff,0xff,0xff,   2,   1,0xff,   2,   1,0xff,   2,   1,0xff,   1,0xff,   1,  // 1c0
-   0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,  // 1d0
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 1e0
-   0xff,   2,   1,0xff,   1,0xff,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 1f0
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 200
-      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 210
-   0xff,   0,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 220
-      1,0xff,   1,0xff };    // 230
+
+#ifdef NEED_WCHAR_FUNCTIONS
 
 // use ctype.h functions for Latin1 (character < 0x100)
 int iswalpha(int c)
 {
-	if(c < 0x100)
+	if(c < 0x80)
 		return(isalpha(c));
 	if((c > 0x3040) && (c <= 0xa700))
 		return(1);  // japanese, chinese characters
 	if(c > MAX_WALPHA)
 		return(0);
-	return(walpha_tab[c-0x100]);
+	return(walpha_tab[c-0x80]);
 }
 
 int iswdigit(int c)
 {
-	if(c < 0x100)
+	if(c < 0x80)
 		return(isdigit(c));
 	return(0);
 }
@@ -247,44 +314,67 @@ int iswalnum(int c)
 int towlower(int c)
 {
 	int x;
-	if(c < 0x100)
+	int ix;
+
+	if(c < 0x80)
 		return(tolower(c));
-	if((c > MAX_WALPHA) || ((x = walpha_tab[c-0x100])==0xff))
-		return(c);  // already lower case
+
+	if((c > MAX_WALPHA) || ((x = walpha_tab[c-0x80]) >= 0xfe))
+		return(c);
+
+	if(x == 0xfd)
+	{
+		// special cases, lookup translation table
+		for(ix=0; wchar_tolower[ix] != 0; ix+=2)
+		{
+			if(wchar_tolower[ix] == c)
+				return(wchar_tolower[ix+1]);
+		}
+	}
 	return(c + x);  // convert to lower case
 }
 
 int towupper(int c)
 {
-	// check whether the previous character code is the upper-case equivalent of this character
-	if(tolower(c-1) == c)
-		return(c-1);  // yes, use it
+	int ix;
+	// check whether a previous character code is the upper-case equivalent of this character
+	if(towlower(c-32) == c)
+		return(c-32); // yes, use it
+	if(towlower(c-1) == c)
+		return(c-1);
+	for(ix=0; wchar_toupper[ix] != 0; ix+=2)
+	{
+		if(wchar_toupper[ix] == c)
+			return(wchar_toupper[ix+1]);
+	}
 	return(c);  // no
 }
 
 int iswupper(int c)
 {
 	int x;
-	if(c < 0x100)
+	if(c < 0x80)
 		return(isupper(c));
-	if(((c > MAX_WALPHA) || (x = walpha_tab[c-0x100])==0) || (x == 0xff))
+	if(((c > MAX_WALPHA) || (x = walpha_tab[c-0x80])==0) || (x == 0xff))
 		return(0);
 	return(1);
 }
 
 int iswlower(int c)
 {
-	if(c < 0x100)
+	if(c < 0x80)
 		return(islower(c));
-	if((c > MAX_WALPHA) || (walpha_tab[c-0x100] != 0xff))
+	if((c > MAX_WALPHA) || (walpha_tab[c-0x80] != 0xff))
 		return(0);
 	return(1);
 }
 
 int iswspace(int c)
 {
-	if(c < 0x100)
+	if(c < 0x80)
 		return(isspace(c));
+	if(c == 0xa0)
+		return(1);
 	return(0);
 }
 
@@ -336,19 +426,105 @@ float wcstod(const wchar_t *str, wchar_t **tailptr)
 }
 #endif
 
+
+// use internal data for iswalpha up to U+024F
+// iswalpha() on Windows is unreliable  (U+AA, U+BA).
+int iswalpha2(int c)
+{
+	if(c < 0x80)
+		return(isalpha(c));
+	if((c > 0x3040) && (c <= 0xa700))
+		return(1);  // japanese, chinese characters
+	if(c > MAX_WALPHA)
+		return(iswalpha(c));
+	return(walpha_tab[c-0x80]);
+}
+
+int iswlower2(int c)
+{
+	if(c < 0x80)
+		return(islower(c));
+	if(c > MAX_WALPHA)
+		return(iswlower(c));
+	if(walpha_tab[c-0x80] == 0xff)
+		return(1);
+	return(0);
+}
+
+int iswupper2(int c)
+{
+	int x;
+	if(c < 0x80)
+		return(isupper(c));
+	if(c > MAX_WALPHA)
+		return(iswupper(c));
+	if(((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe))
+		return(1);
+	return(0);
+}
+
 int towlower2(unsigned int c)
 {
+	int x;
+	int ix;
+
 	// check for non-standard upper to lower case conversions
 	if(c == 'I')
 	{
-		if(translator->translator_name == L('t','r'))
+		if(translator->langopts.dotless_i)
 		{
 			c = 0x131;   // I -> ı
 		}
 	}
-	return(towlower(c));
+
+	if(c < 0x80)
+		return(tolower(c));
+
+	if(c > MAX_WALPHA)
+		return(towlower(c));
+
+	if((x = walpha_tab[c-0x80]) >= 0xfe)
+		return(c);   // this is not an upper case letter
+
+	if(x == 0xfd)
+	{
+		// special cases, lookup translation table
+		for(ix=0; wchar_tolower[ix] != 0; ix+=2)
+		{
+			if(wchar_tolower[ix] == (int)c)
+				return(wchar_tolower[ix+1]);
+		}
+	}
+	return(c + x);  // convert to lower case
+}
+
+int towupper2(unsigned int c)
+{
+	int ix;
+	if(c > MAX_WALPHA)
+		return(towupper(c));
+
+	// check whether a previous character code is the upper-case equivalent of this character
+	if(towlower2(c-32) == (int)c)
+		return(c-32); // yes, use it
+	if(towlower2(c-1) == (int)c)
+		return(c-1);
+	for(ix=0; wchar_toupper[ix] != 0; ix+=2)
+	{
+		if(wchar_toupper[ix] == (int)c)
+			return(wchar_toupper[ix+1]);
+	}
+	return(c);  // no
 }
 
+static int IsRomanU(unsigned int c)
+{//================================
+	if((c=='I') || (c=='V') || (c=='X') || (c=='L'))
+		return(1);
+	return(0);
+}
+
+
 static void GetC_unget(int c)
 {//==========================
 // This is only called with UTF8 input, not wchar input
@@ -411,7 +587,7 @@ static int GetC_get(void)
 			end_of_input = 1;
 			return(0);
 		}
-	
+
 		if(!end_of_input)
 		{
 			if(option_multibyte == espeakCHARS_16BIT)
@@ -438,10 +614,8 @@ static int GetC(void)
 	int cbuf[4];
 	int ix;
 	int n_bytes;
-	unsigned char m;
 	static int ungot2 = 0;
 	static const unsigned char mask[4] = {0xff,0x1f,0x0f,0x07};
-	static const unsigned char mask2[4] = {0,0x80,0x20,0x30};
 
 	if((c1 = ungot_char) != 0)
 	{
@@ -482,7 +656,6 @@ static int GetC(void)
 		if((ix = n_bytes) > 0)
 		{
 			c = c1 & mask[ix];
-			m = mask2[ix];
 			while(ix > 0)
 			{
 				if((c2 = cbuf[ix] = GetC_get()) == 0)
@@ -501,7 +674,6 @@ static int GetC(void)
 					GetC_unget(c2);
 					break;
 				}
-				m = 0x80;
 				c = (c << 6) + (c2 & 0x3f);
 				ix--;
 			}
@@ -530,8 +702,8 @@ static void UngetC(int c)
 }
 
 
-static const char *WordToString2(unsigned int word)
-{//================================================
+const char *WordToString2(unsigned int word)
+{//============================================
 // Convert a language mnemonic word into a string
 	int  ix;
 	static char buf[5];
@@ -555,20 +727,20 @@ static const char *LookupSpecial(Translator *tr, const char *string, char* text_
 	char phonemes2[55];
 	char *string1 = (char *)string;
 
+	flags[0] = flags[1] = 0;
 	if(LookupDictList(tr,&string1,phonemes,flags,0,NULL))
 	{
-		SetWordStress(tr, phonemes, &flags[0], -1, 0);
+		SetWordStress(tr, phonemes, flags, -1, 0);
 		DecodePhonemes(phonemes,phonemes2);
-		sprintf(text_out,"[[%s]]",phonemes2);
-		option_phoneme_input |= 2;
+		sprintf(text_out,"[\002%s]]",phonemes2);
 		return(text_out);
 	}
 	return(NULL);
 }
 
 
-static const char *LookupCharName(Translator *tr, int c)
-{//=====================================================
+static const char *LookupCharName(Translator *tr, int c, int only)
+{//===============================================================
 // Find the phoneme string (in ascii) to speak the name of character c
 // Used for punctuation characters and symbols
 
@@ -589,20 +761,28 @@ static const char *LookupCharName(Translator *tr, int c)
 	ix = utf8_out(c,&single_letter[2]);
 	single_letter[2+ix]=0;
 
-	string = &single_letter[1];
-	if(LookupDictList(tr, &string, phonemes, flags, 0, NULL) == 0)
+	if(only)
 	{
-		// try _* then *
 		string = &single_letter[2];
+		LookupDictList(tr, &string, phonemes, flags, 0, NULL);
+	}
+	else
+	{
+		string = &single_letter[1];
 		if(LookupDictList(tr, &string, phonemes, flags, 0, NULL) == 0)
 		{
-			// now try the rules
-			single_letter[1] = ' ';
-			TranslateRules(tr, &single_letter[2], phonemes, sizeof(phonemes), NULL,0,NULL);
+			// try _* then *
+			string = &single_letter[2];
+			if(LookupDictList(tr, &string, phonemes, flags, 0, NULL) == 0)
+			{
+				// now try the rules
+				single_letter[1] = ' ';
+				TranslateRules(tr, &single_letter[2], phonemes, sizeof(phonemes), NULL,0,NULL);
+			}
 		}
 	}
 
-	if((phonemes[0] == 0) && (tr->translator_name != L('e','n')))
+	if((only==0) && ((phonemes[0] == 0) || (phonemes[0] == phonSWITCH)) && (tr->translator_name != L('e','n')))
 	{
 		// not found, try English
 		SetTranslator2("en");
@@ -627,23 +807,22 @@ static const char *LookupCharName(Translator *tr, int c)
 	{
 		if(lang_name)
 		{
-			SetWordStress(translator2, phonemes, &flags[0], -1, 0);
+			SetWordStress(translator2, phonemes, flags, -1, 0);
 			DecodePhonemes(phonemes,phonemes2);
-			sprintf(buf,"[[_^_%s %s _^_%s]]","en",phonemes2,WordToString2(tr->translator_name));
+			sprintf(buf,"[\002_^_%s %s _^_%s]]","en",phonemes2,WordToString2(tr->translator_name));
 			SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 		}
 		else
 		{
-			SetWordStress(tr, phonemes, &flags[0], -1, 0);
+			SetWordStress(tr, phonemes, flags, -1, 0);
 			DecodePhonemes(phonemes,phonemes2);
-			sprintf(buf,"[[%s]] ",phonemes2);
+			sprintf(buf,"[\002%s]] ",phonemes2);
 		}
-		option_phoneme_input |= 2;
 	}
 	else
+	if(only == 0)
 	{
-		strcpy(buf,"[[(X1)(X1)(X1)]]");
-		option_phoneme_input |= 2;
+		strcpy(buf,"[\002(X1)(X1)(X1)]]");
 	}
 
 	return(buf);
@@ -691,12 +870,13 @@ static int LoadSoundFile(const char *fname, int index)
 	}
 
 	f = NULL;
+
+#ifndef _WIN32
 #ifdef PLATFORM_POSIX
 	if((f = fopen(fname,"rb")) != NULL)
 	{
 		int ix;
 		int fd_temp;
-		const char *resample;
 		int header[3];
 		char command[sizeof(fname2)+sizeof(fname2)+40];
 
@@ -710,17 +890,11 @@ static int LoadSoundFile(const char *fname, int index)
 			fclose(f);
 			f = NULL;
 
-			if(header[2] == samplerate)
-				resample = "";
-			else
-				resample = "polyphase";
-
 			strcpy(fname_temp,"/tmp/espeakXXXXXX");
 			if((fd_temp = mkstemp(fname_temp)) >= 0)
 			{
 				close(fd_temp);
-//			sprintf(fname_temp,"%s.wav",tmpnam(NULL));
-				sprintf(command,"sox \"%s\" -r %d -w -s -c1 %s %s\n", fname, samplerate, fname_temp, resample);
+				sprintf(command,"sox \"%s\" -r %d -c1 -t wav %s\n", fname, samplerate, fname_temp);
 				if(system(command) == 0)
 				{
 					fname = fname_temp;
@@ -729,13 +903,14 @@ static int LoadSoundFile(const char *fname, int index)
 		}
 	}
 #endif
+#endif
 
 	if(f == NULL)
 	{
 		f = fopen(fname,"rb");
 		if(f == NULL)
 		{
-			fprintf(stderr,"Can't read temp file: %s\n",fname);
+//			fprintf(stderr,"Can't read temp file: %s\n",fname);
 			return(3);
 		}
 	}
@@ -747,9 +922,9 @@ static int LoadSoundFile(const char *fname, int index)
 		fclose(f);
 		return(4);
 	}
-	fread(p,length,1,f);
+	length = fread(p,1,length,f);
 	fclose(f);
-#if 0
+#ifndef _WIN32
 	remove(fname_temp);
 #endif
 
@@ -810,57 +985,92 @@ static int LoadSoundFile2(const char *fname)
 
 
 
-static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bufix)
-{//=================================================================================
+static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause)
+{//=============================================================================================================
 	// announce punctuation names
 	// c1:  the punctuation character
 	// c2:  the following character
 
 	int punct_count;
-	const char *punctname;
-	int found = 0;
+	const char *punctname = NULL;
 	int soundicon;
-	char *p;
+	int attributes;
+	int short_pause;
+	int c2;
+	int len;
+	int bufix1;
+	char buf[200];
+	char buf2[80];
+	char ph_buf[30];
+
+	c2 = *c2_ptr;
+	buf[0] = 0;
 
 	if((soundicon = LookupSoundicon(c1)) >= 0)
 	{
 		// add an embedded command to play the soundicon
-		sprintf(&buf[bufix],"\001%dI ",soundicon);
+		sprintf(buf,"\001%dI ",soundicon);
 		UngetC(c2);
-		found = 1;
 	}
 	else
-	if((punctname = LookupCharName(tr, c1)) != NULL)
 	{
-		found = 1;
-		if(bufix==0)
+		if((c1 == '.') && (end_clause) && (c2 != '.'))
+		{
+			if(LookupSpecial(tr, "_.p", ph_buf))
+			{
+				punctname = ph_buf;  // use word for 'period' instead of 'dot'
+			}
+		}
+		if(punctname == NULL)
+		{
+			punctname = LookupCharName(tr, c1, 0);
+		}
+
+		if(punctname == NULL)
+			return(-1);
+
+		if((*bufix==0) || (end_clause ==0) || (tr->langopts.param[LOPT_ANNOUNCE_PUNCT] & 2))
 		{
 			punct_count=1;
-			while(c2 == c1)
+			while((c2 == c1) && (c1 != '<'))  // don't eat extra '<', it can miss XML tags
 			{
 				punct_count++;
 				c2 = GetC();
 			}
-			UngetC(c2);
+			*c2_ptr = c2;
+			if(end_clause)
+			{
+				UngetC(c2);
+			}
 
-			p = &buf[bufix];
 			if(punct_count==1)
 			{
-				sprintf(p,"%s %s %s",tone_punct_on,punctname,tone_punct_off);
+//				sprintf(buf,"%s %s %s",tone_punct_on,punctname,tone_punct_off);
+				sprintf(buf," %s",punctname);   // we need the space before punctname, to ensure it doesn't merge with the previous word  (eg.  "2.-a")
 			}
 			else
 			if(punct_count < 4)
 			{
-				sprintf(p,"\001+10S%s",tone_punct_on);
+				buf[0] = 0;
+				if(embedded_value[EMBED_S] < 300)
+					sprintf(buf,"\001+10S");  // Speak punctuation name faster, unless we are already speaking fast.  It would upset Sonic SpeedUp
+
 				while(punct_count-- > 0)
-					sprintf(buf,"%s %s",buf,punctname);
-				sprintf(p,"%s %s\001-10S",buf,tone_punct_off);
+				{
+					sprintf(buf2," %s",punctname);
+					strcat(buf, buf2);
+				}
+
+				if(embedded_value[EMBED_S] < 300)
+				{
+					sprintf(buf2," \001-10S");
+					strcat(buf, buf2);
+				}
 			}
 			else
 			{
-				sprintf(p,"%s %s %d %s %s",
-						tone_punct_on,punctname,punct_count,punctname,tone_punct_off);
-				return(CLAUSE_COMMA);
+				sprintf(buf," %s %d %s",
+						punctname,punct_count,punctname);
 			}
 		}
 		else
@@ -873,24 +1083,39 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu
 					ssml_ignore_l_angle = c1;  // this was &lt; which was converted to <, don't pick it up again as <
 			}
 			ungot_char2 = c1;
-			buf[bufix] = ' ';
-			buf[bufix+1] = 0;
+			buf[0] = ' ';
+			buf[1] = 0;
 		}
 	}
 
-	if(found == 0)
+	bufix1 = *bufix;
+	len = strlen(buf);
+	strcpy(&output[*bufix],buf);
+	*bufix += len;
+
+	if(end_clause==0)
 		return(-1);
 
 	if(c1 == '-')
 		return(CLAUSE_NONE);   // no pause
-	if(bufix > 0)
-		return(CLAUSE_SHORTCOMMA);
-	if((strchr_w(punct_close,c1) != NULL) && !iswalnum(c2))
-		return(CLAUSE_SHORTFALL+4);
-	if(iswspace(c2) && strchr_w(punct_stop,c1)!=NULL)
-		return(punct_attributes[lookupwchar(punct_chars,c1)]);
-	
-	return(CLAUSE_SHORTCOMMA);
+
+	attributes = punct_attributes[lookupwchar(punct_chars,c1)];
+
+	short_pause = CLAUSE_SHORTFALL;
+	if((attributes & CLAUSE_BITS_INTONATION) == 0x1000)
+		short_pause = CLAUSE_SHORTCOMMA;
+
+	if((bufix1 > 0) && !(tr->langopts.param[LOPT_ANNOUNCE_PUNCT] & 2))
+	{
+		if((attributes & ~0x8000) == CLAUSE_SEMICOLON)
+			return(CLAUSE_SHORTFALL);
+		return(short_pause);
+	}
+
+	if(attributes & CLAUSE_BIT_SENTENCE)
+		return(attributes);
+
+	return(short_pause);
 }  //  end of AnnouncePunctuation
 
 #define SSML_SPEAK     1
@@ -908,10 +1133,11 @@ static int AnnouncePunctuation(Translator *tr, int c1, int c2, char *buf, int bu
 #define SSML_BREAK    13
 #define SSML_IGNORE_TEXT 14
 #define HTML_BREAK    15
-#define SSML_CLOSE    0x10   // for a closing tag, OR this with the tag type
+#define HTML_NOSPACE  16    // don't insert a space for this element, so it doesn't break a word
+#define SSML_CLOSE    0x20   // for a closing tag, OR this with the tag type
 
 // these tags have no effect if they are self-closing, eg. <voice />
-static char ignore_if_self_closing[] = {0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0};
+static char ignore_if_self_closing[] = {0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0,0,0};
 
 
 static MNEM_TAB ssmltags[] = {
@@ -932,6 +1158,7 @@ static MNEM_TAB ssmltags[] = {
 
 	{"br", HTML_BREAK},
 	{"li", HTML_BREAK},
+	{"dd", HTML_BREAK},
 	{"img", HTML_BREAK},
 	{"td", HTML_BREAK},
 	{"h1", SSML_PARAGRAPH},
@@ -941,29 +1168,37 @@ static MNEM_TAB ssmltags[] = {
 	{"hr", SSML_PARAGRAPH},
 	{"script", SSML_IGNORE_TEXT},
 	{"style", SSML_IGNORE_TEXT},
+	{"font", HTML_NOSPACE},
+	{"b", HTML_NOSPACE},
+	{"i", HTML_NOSPACE},
+	{"strong", HTML_NOSPACE},
+	{"em", HTML_NOSPACE},
+	{"code", HTML_NOSPACE},
 	{NULL,0}};
 
 
 
 
-static const char *VoiceFromStack()
+static const char *VoiceFromStack(void)
 {//================================
 // Use the voice properties from the SSML stack to choose a voice, and switch
 // to that voice if it's not the current voice
 	int ix;
+	const char *p;
 	SSML_STACK *sp;
 	const char *v_id;
 	int voice_name_specified;
 	int voice_found;
 	espeak_VOICE voice_select;
-	char voice_name[40];
+	static char voice_name[40];
 	char language[40];
+	char buf[80];
 
 	strcpy(voice_name,ssml_stack[0].voice_name);
 	strcpy(language,ssml_stack[0].language);
 	voice_select.age = ssml_stack[0].voice_age;
 	voice_select.gender = ssml_stack[0].voice_gender;
-	voice_select.variant = ssml_stack[0].voice_variant;
+	voice_select.variant = ssml_stack[0].voice_variant_number;
 	voice_select.identifier = NULL;
 
 	for(ix=0; ix<n_ssml_stack; ix++)
@@ -983,15 +1218,32 @@ static const char *VoiceFromStack()
 		if(sp->language[0] != 0)
 		{
 			strcpy(language, sp->language);
+
+			// is this language provided by the base voice?
+			p = base_voice.languages;
+			while(*p++ != 0)
+			{
+				if(strcmp(p, language) == 0)
+				{
+					// yes, change the language to the main language of the base voice
+					strcpy(language, &base_voice.languages[1]);
+					break;
+				}
+				p += (strlen(p) + 1);
+			}
+
 			if(voice_name_specified == 0)
 				voice_name[0] = 0;  // forget a previous voice name if a language is specified
 		}
 		if(sp->voice_gender != 0)
+		{
 			voice_select.gender = sp->voice_gender;
+		}
+
 		if(sp->voice_age != 0)
 			voice_select.age = sp->voice_age;
-		if(sp->voice_variant != 0)
-			voice_select.variant = sp->voice_variant;
+		if(sp->voice_variant_number != 0)
+			voice_select.variant = sp->voice_variant_number;
 	}
 
 	voice_select.name = voice_name;
@@ -999,6 +1251,14 @@ static const char *VoiceFromStack()
 	v_id = SelectVoice(&voice_select, &voice_found);
 	if(v_id == NULL)
 		return("default");
+
+	if((strchr(v_id, '+') == NULL) && ((voice_select.gender == 0) || (voice_select.gender == base_voice.gender)) && (base_voice_variant_name[0] != 0))
+	{
+		// a voice variant has not been selected, use the original voice variant
+		sprintf(buf, "%s+%s", v_id, base_voice_variant_name);
+		strncpy0(voice_name, buf, sizeof(voice_name));
+		return(voice_name);
+	}
 	return(v_id);
 }  // end of VoiceFromStack
 
@@ -1012,7 +1272,7 @@ static void ProcessParamStack(char *outbuf, int *outix)
 	int value;
 	char buf[20];
 	int new_parameters[N_SPEECH_PARAM];
-	static char cmd_letter[N_SPEECH_PARAM] = {0, 'S','A','P','R', 0, 0, 0, 0, 0, 0, 0, 'F'};  // embedded command letters
+	static char cmd_letter[N_SPEECH_PARAM] = {0, 'S','A','P','R', 0, 'C', 0, 0, 0, 0, 0, 'F'};  // embedded command letters
 
 
 	for(param=0; param<N_SPEECH_PARAM; param++)
@@ -1054,7 +1314,7 @@ static void ProcessParamStack(char *outbuf, int *outix)
 
 			speech_parameters[param] = new_parameters[param];
 			strcpy(&outbuf[*outix],buf);
-			(*outix) += strlen(buf);
+			*outix += strlen(buf);
 		}
 	}
 }  // end of ProcessParamStack
@@ -1126,7 +1386,7 @@ static wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name)
 				while(iswspace(*pw)) pw++;
 				if(*pw == '=') pw++;
 				while(iswspace(*pw)) pw++;
-				if(*pw == '"')
+				if((*pw == '"') || (*pw == '\''))  // allow single-quotes ?
 					return(pw+1);
 				else
 					return(empty);
@@ -1141,14 +1401,14 @@ static wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name)
 static int attrcmp(const wchar_t *string1, const char *string2)
 {//============================================================
 	int  ix;
-	
+
 	if(string1 == NULL)
 		return(1);
 
 	for(ix=0; (string1[ix] == string2[ix]) && (string1[ix] != 0); ix++)
 	{
 	}
-	if((string1[ix]=='"') && (string2[ix]==0))
+	if(((string1[ix]=='"') || (string1[ix]=='\'')) && (string2[ix]==0))
 		return(0);
 	return(1);
 }
@@ -1171,10 +1431,10 @@ static int attrnumber(const wchar_t *pw, int default_value, int type)
 {//==================================================================
 	int value = 0;
 
-	if((pw == NULL) || !isdigit(*pw))
+	if((pw == NULL) || !IsDigit09(*pw))
 		return(default_value);
 
-	while(isdigit(*pw))
+	while(IsDigit09(*pw))
 	{
 		value = value*10 + *pw++ - '0';
 	}
@@ -1217,7 +1477,7 @@ static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 {//=============================================================================
 	int sign = 0;
 	wchar_t *tail;
-	float value;
+	double value;
 
 	while(iswspace(*pw)) pw++;
 	if(*pw == '+')
@@ -1227,10 +1487,10 @@ static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 	}
 	if(*pw == '-')
 	{
-		pw++;	
+		pw++;
 		sign = -1;
 	}
-	value = (float)wcstod(pw,&tail);
+	value = (double)wcstod(pw,&tail);
 	if(tail == pw)
 	{
 		// failed to find a number, return 100%
@@ -1248,16 +1508,23 @@ static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 
 	if((tail[0]=='s') && (tail[1]=='t'))
 	{
+#ifdef PLATFORM_RISCOS
+		*value_out = 100;
+#else
 		double x;
 		// convert from semitones to a  frequency percentage
-		x = pow(double(2.0),double((value*sign)/12)) * 100;
+		x = pow((double)2.0,(double)((value*sign)/12)) * 100;
 		*value_out = (int)x;
+#endif
 		return(2);   // percentage
 	}
 
 	if(param_type == espeakRATE)
 	{
-		*value_out = (int)(value * 100);
+		if(sign == 0)
+			*value_out = (int)(value * 100);
+		else
+			*value_out = 100 + (int)(sign * value * 100);
 		return(2);   // percentage
 	}
 
@@ -1266,7 +1533,7 @@ static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
 }  // end of attr_prosody_value
 
 
-int AddNameData(const char *name, int wide)
+static int AddNameData(const char *name, int wide)
 {//========================================
 // Add the name to the namedata and return its position
 // (Used by the Windows SAPI wrapper)
@@ -1287,11 +1554,12 @@ int AddNameData(const char *name, int wide)
 	if(namedata_ix+len >= n_namedata)
 	{
 		// allocate more space for marker names
-		if((vp = realloc(namedata, namedata_ix+len + 300)) == NULL)
+		if((vp = realloc(namedata, namedata_ix+len + 1000)) == NULL)
 			return(-1);  // failed to allocate, original data is unchanged but ignore this new name
+// !!! Bug?? If the allocated data shifts position, then pointers given to user application will be invalid
 
 		namedata = (char *)vp;
-		n_namedata = namedata_ix+len + 300;
+		n_namedata = namedata_ix+len + 1000;
 	}
 	memcpy(&namedata[ix = namedata_ix],name,len);
 	namedata_ix += len;
@@ -1299,8 +1567,8 @@ int AddNameData(const char *name, int wide)
 }  //  end of AddNameData
 
 
-void SetVoiceStack(espeak_VOICE *v)
-{//================================
+void SetVoiceStack(espeak_VOICE *v, const char *variant_name)
+{//==========================================================
 	SSML_STACK *sp;
 	sp = &ssml_stack[0];
 
@@ -1312,10 +1580,15 @@ void SetVoiceStack(espeak_VOICE *v)
 	if(v->languages != NULL)
 		strcpy(sp->language,v->languages);
 	if(v->name != NULL)
-		strcpy(sp->voice_name,v->name);
-	sp->voice_variant = v->variant;
+		strncpy0(sp->voice_name, v->name, sizeof(sp->voice_name));
+	sp->voice_variant_number = v->variant;
 	sp->voice_age = v->age;
 	sp->voice_gender = v->gender;
+
+	if(memcmp(variant_name, "!v", 2) == 0)
+		variant_name += 3;// strip variant directory name, !v plus PATHSEP
+	strncpy0(base_voice_variant_name, variant_name, sizeof(base_voice_variant_name));
+	memcpy(&base_voice, &current_voice_selected, sizeof(base_voice));
 }
 
 
@@ -1332,6 +1605,7 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
 	wchar_t *name;
 	wchar_t *age;
 	wchar_t *variant;
+	int value;
 	const char *new_voice_id;
 	SSML_STACK *ssml_sp;
 
@@ -1369,15 +1643,17 @@ static int GetVoiceAttributes(wchar_t *pw, int tag_type)
 			age = GetSsmlAttribute(pw,"age");
 			gender = GetSsmlAttribute(pw,"gender");
 		}
-	
+
 		if((tag_type != SSML_VOICE) && (lang==NULL))
 			return(0);  // <s> or <p> without language spec, nothing to do
-	
+
 		ssml_sp = &ssml_stack[n_ssml_stack++];
 
 		attrcopy_utf8(ssml_sp->language,lang,sizeof(ssml_sp->language));
 		attrcopy_utf8(ssml_sp->voice_name,name,sizeof(ssml_sp->voice_name));
-		ssml_sp->voice_variant = attrnumber(variant,1,0)-1;
+		if((value = attrnumber(variant,1,0)) > 0)
+			value--;    // variant='0' and variant='1' the same
+		ssml_sp->voice_variant_number = value;
 		ssml_sp->voice_age = attrnumber(age,0,0);
 		ssml_sp->voice_gender = attrlookup(gender,mnem_gender);
 		ssml_sp->tag_type = tag_type;
@@ -1415,8 +1691,8 @@ static void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp)
 		{"x-slow",60},
 		{"slow",80},
 		{"medium",100},
-		{"fast",120},
-		{"x-fast",150},
+		{"fast",125},
+		{"x-fast",160},
 		{NULL, -1}};
 
 	static const MNEM_TAB mnem_pitch[] = {
@@ -1467,6 +1743,31 @@ static void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp)
 }  // end of SetProsodyParemeter
 
 
+static int ReplaceKeyName(char *outbuf, int index, int *outix)
+{//===========================================================
+// Replace some key-names by single characters, so they can be pronounced in different languages
+	static MNEM_TAB keynames[] = {
+	{"space ",0xe020},
+	{"tab ", 0xe009},
+	{"underscore ", 0xe05f},
+	{"double-quote ", '"'},
+	{NULL, 0}};
+
+	int ix;
+	int letter;
+	char *p;
+
+	p = &outbuf[index];
+
+	if((letter = LookupMnem(keynames, p)) != 0)
+	{
+		ix = utf8_out(letter, p);
+		*outix = index + ix;
+		return(letter);
+	}
+	return(0);
+}
+
 
 static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outbuf, int self_closing)
 {//==================================================================================================
@@ -1483,7 +1784,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 	int voice_change_flag;
 	wchar_t *px;
 	wchar_t *attr1;
-	wchar_t *attr2; 
+	wchar_t *attr2;
 	wchar_t *attr3;
 	int terminator;
 	char *uri;
@@ -1532,6 +1833,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 		{"reduced",2},
 		{"moderate",3},
 		{"strong",4},
+		{"x-strong",5},
 		{NULL,-1}};
 
 	static const char *prosody_attr[5] = {
@@ -1546,19 +1848,29 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 	tag_name[ix] = 0;
 
 	px = &xml_buf[ix];   // the tag's attributes
-	
+
 	if(tag_name[0] == '/')
 	{
-		tag_type = LookupMnem(ssmltags,&tag_name[1]) + SSML_CLOSE;  // closing tag
+		// closing tag
+		if((tag_type = LookupMnem(ssmltags,&tag_name[1])) != HTML_NOSPACE)
+		{
+			outbuf[(*outix)++] = ' ';
+		}
+		tag_type += SSML_CLOSE;
 	}
 	else
 	{
-		tag_type = LookupMnem(ssmltags,tag_name);
+		if((tag_type = LookupMnem(ssmltags,tag_name)) != HTML_NOSPACE)
+		{
+			// separate SSML tags from the previous word (but not HMTL tags such as <b> <font> which can occur inside a word)
+			outbuf[(*outix)++] = ' ';
+		}
 
 		if(self_closing && ignore_if_self_closing[tag_type])
 			return(0);
 	}
 
+
 	voice_change_flag = 0;
 	terminator = CLAUSE_NONE;
 	ssml_sp = &ssml_stack[n_ssml_stack-1];
@@ -1610,14 +1922,16 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 
 		if(translator->langopts.tone_language == 1)
 		{
-			static unsigned char emphasis_to_pitch_range[] = {50,50,40,70,90,90};
-			static unsigned char emphasis_to_volume[] = {100,100,70,110,140,140};
+			static unsigned char emphasis_to_pitch_range[] = {50,50,40,70,90,100};
+			static unsigned char emphasis_to_volume[] = {100,100,70,110,135,150};
 			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
 			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
 			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
 		}
 		else
 		{
+			static unsigned char emphasis_to_volume2[] = {100,100,75,100,120,150};
+			sp->parameter[espeakVOLUME] = emphasis_to_volume2[value];
 			sp->parameter[espeakEMPHASIS] = value;
 		}
 		ProcessParamStack(outbuf, outix);
@@ -1650,12 +1964,19 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 
 		sprintf(buf,"%c%dY",CTRL_EMBEDDED,value);
 		strcpy(&outbuf[*outix],buf);
-		(*outix) += strlen(buf);
+		*outix += strlen(buf);
 
+		sayas_start = *outix;
 		sayas_mode = value;   // punctuation doesn't end clause during SAY-AS
 		break;
 
 	case SSML_SAYAS + SSML_CLOSE:
+		if(sayas_mode == SAYAS_KEY)
+		{
+			outbuf[*outix] = 0;
+			ReplaceKeyName(outbuf, sayas_start, outix);
+		}
+
 		outbuf[(*outix)++] = CTRL_EMBEDDED;
 		outbuf[(*outix)++] = 'Y';
 		sayas_mode = 0;
@@ -1666,7 +1987,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 		{
 			// use the alias  rather than the text
 			ignore_text = 1;
-			(*outix) += attrcopy_utf8(&outbuf[*outix],attr1,n_outbuf-*outix);
+			*outix += attrcopy_utf8(&outbuf[*outix],attr1,n_outbuf-*outix);
 		}
 		break;
 
@@ -1697,7 +2018,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 			{
 				sprintf(buf,"%c%dM",CTRL_EMBEDDED,index);
 				strcpy(&outbuf[*outix],buf);
-				(*outix) += strlen(buf);
+				*outix += strlen(buf);
 			}
 		}
 		break;
@@ -1725,7 +2046,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 				{
 					sprintf(buf,"%c%dI",CTRL_EMBEDDED,index);
 					strcpy(&outbuf[*outix],buf);
-					(*outix) += strlen(buf);
+					*outix += strlen(buf);
 					sp->parameter[espeakSILENCE] = 1;
 				}
 			}
@@ -1738,7 +2059,7 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 					{
 						sprintf(buf,"%c%dU",CTRL_EMBEDDED,index);
 						strcpy(&outbuf[*outix],buf);
-						(*outix) += strlen(buf);
+						*outix += strlen(buf);
 						sp->parameter[espeakSILENCE] = 1;
 					}
 				}
@@ -1748,10 +2069,13 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 
 		if(self_closing)
 			PopParamStack(tag_type, outbuf, outix);
+		else
+			audio_text = 1;
 		return(CLAUSE_NONE);
 
 	case SSML_AUDIO + SSML_CLOSE:
 		PopParamStack(tag_type, outbuf, outix);
+		audio_text = 0;
 		return(CLAUSE_NONE);
 
 	case SSML_BREAK:
@@ -1766,14 +2090,20 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 			{
 				// adjust prepause on the following word
 				sprintf(&outbuf[*outix],"%c%dB",CTRL_EMBEDDED,value);
-				(*outix) += 3;
+				*outix += 3;
 				terminator = 0;
 			}
 			value = break_value[value];
 		}
 		if((attr2 = GetSsmlAttribute(px,"time")) != NULL)
 		{
-			value = (attrnumber(attr2,0,1) * 25) / speed.speed_factor1; // compensate for speaking speed to keep constant pause length
+			value2 = attrnumber(attr2,0,1);   // pause in mS
+
+			// compensate for speaking speed to keep constant pause length, see function PauseLength()
+			// 'value' here is x 10mS
+			value = (value2 * 256) / (speed.clause_pause_factor * 10);
+			if(value < 200)
+				value = (value2 * 256) / (speed.pause_factor * 10);
 
 			if(terminator == 0)
 				terminator = CLAUSE_NONE;
@@ -1781,7 +2111,13 @@ static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int *outix, int n_outb
 		if(terminator)
 		{
 			if(value > 0xfff)
-				value = 0xfff;
+			{
+				// scale down the value and set a scaling indicator bit
+				value = value / 32;
+				if(value > 0xfff)
+					value = 0xfff;
+				terminator |= CLAUSE_PAUSE_LONG;
+			}
 			return(terminator + value);
 		}
 		break;
@@ -1873,9 +2209,18 @@ terminator=0;  // ??  Sentence intonation, but no pause ??
 }  // end of ProcessSsmlTag
 
 
+static void RemoveChar(char *p)
+{//=======================
+// Replace a UTF-8 character by spaces
+	int c;
+
+	memset(p, ' ', utf8_in(&c, p));
+}  // end of RemoveChar
+
+
 static MNEM_TAB xml_char_mnemonics[] = {
 	{"gt",'>'},
-	{"lt",'<'},
+	{"lt", 0xe000 + '<'},   // private usage area, to avoid confusion with XML tag
 	{"amp", '&'},
 	{"quot", '"'},
 	{"nbsp", ' '},
@@ -1883,8 +2228,8 @@ static MNEM_TAB xml_char_mnemonics[] = {
 	{NULL,-1}};
 
 
-int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type)
-{//=============================================================================================================
+int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change)
+{//=================================================================================================================================
 /* Find the end of the current clause.
 	Write the clause into  buf
 
@@ -1899,6 +2244,8 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 	int c1=' ';  // current character
 	int c2;  // next character
 	int cprev=' ';  // previous character
+	int cprev2=' ';
+	int c_next;
 	int parag;
 	int ix = 0;
 	int j;
@@ -1911,9 +2258,12 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 	int found;
 	int any_alnum = 0;
 	int self_closing;
-	int punct_data;
+	int punct_data = 0;
+	int is_end_clause;
+	int announced_punctuation = 0;
 	int stressed_word = 0;
-	const char *p;
+	int end_clause_after_tag = 0;
+	int end_clause_index = 0;
 	wchar_t xml_buf[N_XML_BUF+1];
 
 #define N_XML_BUF2   20
@@ -1927,10 +2277,12 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 		clear_skipping_text = 0;
 	}
 
+	tr->phonemes_repeat_count = 0;
 	tr->clause_upper_count = 0;
 	tr->clause_lower_count = 0;
 	end_of_input = 0;
 	*tone_type = 0;
+	*voice_change = 0;
 
 f_input = f_in;  // for GetC etc
 
@@ -1960,7 +2312,7 @@ f_input = f_in;  // for GetC etc
 				return(CLAUSE_EOF);
 			}
 
-			if((skip_characters > 0) && (count_characters > skip_characters))
+			if((skip_characters > 0) && (count_characters >= skip_characters))
 			{
 				// reached the specified start position
 				// don't break a word
@@ -1971,6 +2323,7 @@ f_input = f_in;  // for GetC etc
 			}
 		}
 
+		cprev2 = cprev;
 		cprev = c1;
 		c1 = c2;
 
@@ -2064,8 +2417,19 @@ f_input = f_in;  // for GetC etc
 					c2 = ' ';
 				}
 				else
-				if((c2 == '/') || iswalpha(c2))
+				if((c2 == '/') || iswalpha2(c2))
 				{
+					// check for space in the output buffer for embedded commands produced by the SSML tag
+					if(ix > (n_buf - 20))
+					{
+						// Perhaps not enough room, end the clause before the SSML tag
+						UngetC(c2);
+						ungot_char2 = c1;
+						buf[ix] = ' ';
+						buf[ix+1] = 0;
+						return(CLAUSE_NONE);
+					}
+
 					// SSML Tag
 					n_xml_buf = 0;
 					c1 = c2;
@@ -2076,9 +2440,7 @@ f_input = f_in;  // for GetC etc
 					}
 					xml_buf[n_xml_buf] = 0;
 					c2 = ' ';
-		
-					buf[ix++] = ' ';
-		
+
 					self_closing = 0;
 					if(xml_buf[n_xml_buf-1] == '/')
 					{
@@ -2086,26 +2448,25 @@ f_input = f_in;  // for GetC etc
 						xml_buf[n_xml_buf-1] = ' ';
 						self_closing = 1;
 					}
-		
-					terminator = ProcessSsmlTag(xml_buf,buf,ix,n_buf,self_closing);
-		
+
+					terminator = ProcessSsmlTag(xml_buf,buf,&ix,n_buf,self_closing);
+
 					if(terminator != 0)
 					{
+						if(end_clause_after_tag)
+							ix = end_clause_index;
+
 						buf[ix] = ' ';
 						buf[ix++] = 0;
-		
+
 						if(terminator & CLAUSE_BIT_VOICE)
 						{
-							// a change in voice, write the new voice name to the end of the buf
-							p = current_voice_id;
-							while((*p != 0) && (ix < (n_buf-1)))
-							{
-								buf[ix++] = *p++;
-							}
-							buf[ix++] = 0;
+							strcpy(voice_change, current_voice_id);
 						}
 						return(terminator);
 					}
+					c1 = ' ';
+					c2 = GetC();
 					continue;
 				}
 			}
@@ -2180,6 +2541,16 @@ f_input = f_in;  // for GetC etc
 
 		linelength++;
 
+		if((j = lookupwchar2(tr->chars_ignore,c1)) != 0)
+		{
+			if(j == 1)
+			{
+				// ignore this character (eg. zero-width-non-joiner U+200C)
+				continue;
+			}
+			c1 = j;   // replace the character
+		}
+
 		if(iswalnum(c1))
 			any_alnum = 1;
 		else
@@ -2192,10 +2563,13 @@ f_input = f_in;  // for GetC etc
 				c2 = ' ';
 			}
 
+			if(c1 == 0xf0b)
+				c1 = ' ';    // Tibet inter-syllabic mark, ?? replace by space ??
+
 			if(iswspace(c1))
 			{
 				char *p_word;
-	
+
 				if(tr->translator_name == 0x6a626f)
 				{
 					// language jbo : lojban
@@ -2215,18 +2589,27 @@ f_input = f_in;  // for GetC etc
 					}
 				}
 			}
+
+			if(c1 == 0xd4d)
+			{
+				// Malayalam virama, check if next character is Zero-width-joiner
+				if(c2 == 0x200d)
+				{
+					c1 = 0xd4e;   // use this unofficial code for chillu-virama
+				}
+			}
 		}
 
-		if(iswupper(c1))
+		if(iswupper2(c1))
 		{
 			tr->clause_upper_count++;
-			if((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev))
+			if((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev))
 			{
 				char text_buf[40];
 				char text_buf2[30];
 				if(LookupSpecial(tr, "_cap", text_buf2) != NULL)
 				{
-					sprintf(text_buf,"%s%s%s",tone_punct_on,text_buf2,tone_punct_off);
+					sprintf(text_buf,"%s",text_buf2);
 					j = strlen(text_buf);
 					if((ix + j) < n_buf)
 					{
@@ -2237,7 +2620,7 @@ f_input = f_in;  // for GetC etc
 			}
 		}
 		else
-		if(iswalpha(c1))
+		if(iswalpha2(c1))
 			tr->clause_lower_count++;
 
 		if(option_phoneme_input)
@@ -2268,6 +2651,10 @@ f_input = f_in;  // for GetC etc
 				// 2nd newline, assume paragraph
 				UngetC(c2);
 
+				if(end_clause_after_tag)
+				{
+					RemoveChar(&buf[end_clause_index]);  // delete clause-end punctiation
+				}
 				buf[ix] = ' ';
 				buf[ix+1] = 0;
 				if(parag > 3)
@@ -2288,88 +2675,231 @@ if(option_ssml) parag=1;
 			linelength = 0;
 		}
 
-		if(option_punctuation && (phoneme_mode==0) && (sayas_mode==0) && iswpunct(c1))
+		announced_punctuation = 0;
+
+		if((phoneme_mode==0) && (sayas_mode==0))
 		{
-			// option is set to explicitly speak punctuation characters
-			// if a list of allowed punctuation has been set up, check whether the character is in it
-			if((option_punctuation == 1) || (wcschr(option_punctlist,c1) != NULL))
+			is_end_clause = 0;
+
+			if(end_clause_after_tag)
 			{
-				if((terminator = AnnouncePunctuation(tr, c1, c2, buf, ix)) >= 0)
-					return(terminator);
+				// Because of an xml tag, we are waiting for the
+				// next non-blank character to decide whether to end the clause
+				// i.e. is dot followed by an upper-case letter?
+
+				if(!iswspace(c1))
+				{
+					if(!IsAlpha(c1) || !iswlower2(c1))
+//					if(iswdigit(c1) || (IsAlpha(c1) && !iswlower2(c1)))
+					{
+						UngetC(c2);
+						ungot_char2 = c1;
+						buf[end_clause_index] = ' ';  // delete the end-clause punctuation
+						buf[end_clause_index+1] = 0;
+						return(end_clause_after_tag);
+					}
+					end_clause_after_tag = 0;
+				}
 			}
-		}
 
-		if((phoneme_mode==0) && (sayas_mode==0) && ((punct = lookupwchar(punct_chars,c1)) != 0))
-		{
-			punct_data = punct_attributes[punct];
+			if((c1 == '.') && (c2 == '.'))
+			{
+				while((c_next = GetC()) == '.')
+				{
+					// 3 or more dots, replace by elipsis
+					c1 = 0x2026;
+					c2 = ' ';
+				}
+				if(c1 == 0x2026)
+					c2 = c_next;
+				else
+					UngetC(c_next);
+			}
 
-			if(punct_data & PUNCT_IN_WORD)
+			punct_data = 0;
+			if((punct = lookupwchar(punct_chars,c1)) != 0)
 			{
-				// Armenian punctuation inside a word
-				stressed_word = 1;
-				*tone_type = punct_data >> 12 & 0xf;   // override the end-of-sentence type
-				continue;
+				punct_data = punct_attributes[punct];
+
+				if(punct_data & PUNCT_IN_WORD)
+				{
+					// Armenian punctuation inside a word
+					stressed_word = 1;
+					*tone_type = punct_data >> 12 & 0xf;   // override the end-of-sentence type
+					continue;
+				}
+
+				if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || Eof() || (c2 == ctrl_embedded)))    // don't check for '-' because it prevents recognizing ':-)'
+//				if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof()))
+				{
+					// note: (c2='?') is for when a smart-quote has been replaced by '?'
+					is_end_clause = 1;
+				}
 			}
 
-			if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof()))
+			// don't announce punctuation for the alternative text inside inside <audio> ... </audio>
+			if(c1 == 0xe000+'<')  c1 = '<';
+			if(option_punctuation && iswpunct(c1) && (audio_text == 0))
 			{
-				// note: (c2='?') is for when a smart-quote has been replaced by '?'
-				buf[ix] = ' ';
-				buf[ix+1] = 0;
-	
-				if((c1 == '.') && (cprev == '.'))
+				// option is set to explicitly speak punctuation characters
+				// if a list of allowed punctuation has been set up, check whether the character is in it
+				if((option_punctuation == 1) || (wcschr(option_punctlist,c1) != NULL))
 				{
-					c1 = 0x2026;
-					punct = 9;   // elipsis
+					tr->phonemes_repeat_count = 0;
+					if((terminator = AnnouncePunctuation(tr, c1, &c2, buf, &ix, is_end_clause)) >= 0)
+						return(terminator);
+					announced_punctuation = c1;
 				}
-	
+			}
+
+			if((punct_data & PUNCT_SAY_NAME) && (announced_punctuation == 0))
+			{
+				// used for elipsis (and 3 dots) if a pronunciation for elipsis is given in *_list
+				char *p2;
+
+				p2 = &buf[ix];
+				sprintf(p2,"%s",LookupCharName(tr, c1, 1));
+				if(p2[0] != 0)
+				{
+					ix += strlen(p2);
+					announced_punctuation = c1;
+					punct_data = punct_data & ~CLAUSE_BITS_INTONATION;  // change intonation type to 0 (full-stop)
+				}
+			}
+
+			if(is_end_clause)
+			{
 				nl_count = 0;
-				while(!Eof() && iswspace(c2))
+				c_next = c2;
+
+				if(iswspace(c_next))
 				{
-					if(c2 == '\n')
-						nl_count++;
-					c2 = GetC();   // skip past space(s)
+					while(!Eof() && iswspace(c_next))
+					{
+						if(c_next == '\n')
+							nl_count++;
+						c_next = GetC();   // skip past space(s)
+					}
 				}
-				if(!Eof())
+
+				if((c1 == '.') && (nl_count < 2))
 				{
-					UngetC(c2);
+					punct_data |= CLAUSE_DOT;
 				}
-	
-				if((nl_count==0) && (c1 == '.'))
+
+				if(nl_count==0)
 				{
-					if(iswdigit(cprev) && (tr->langopts.numbers & 0x10000) && islower(c2))
+					if((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h','u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next))))
+					{
+						// lang=hu, fix for ordinal numbers, eg:  "december 2., szerda", ignore ',' after ordinal number
+						c1 = CHAR_COMMA_BREAK;
+						is_end_clause = 0;
+					}
+
+					if(c1 == '.')
+					{
+						if((tr->langopts.numbers & NUM_ORDINAL_DOT) &&
+							(iswdigit(cprev) || (IsRomanU(cprev) && (IsRomanU(cprev2) || iswspace(cprev2)))))  // lang=hu
+						{
+							// dot after a number indicates an ordinal number
+							if(!iswdigit(cprev))
+							{
+								is_end_clause = 0;  // Roman number followed by dot
+							}
+							else
+							{
+								if (iswlower2(c_next) || (c_next=='-'))     // hyphen is needed for lang-hu (eg. 2.-kal)
+									is_end_clause = 0;      // only if followed by lower-case, (or if there is a XML tag)
+							}
+						}
+						else
+						if(c_next == '\'')
+						{
+							is_end_clause = 0;    // eg. u.s.a.'s
+						}
+						if(iswlower2(c_next))
+						{
+							// next word has no capital letter, this dot is probably from an abbreviation
+//							c1 = ' ';
+							is_end_clause = 0;
+						}
+						if(any_alnum==0)
+						{
+							// no letters or digits yet, so probably not a sentence terminator
+							// Here, dot is followed by space or bracket
+							c1 = ' ';
+							is_end_clause = 0;
+						}
+					}
+					else
 					{
-						// dot after a number indicates an ordinal number
-						c2 = '.';
-						continue;
+						if(any_alnum==0)
+						{
+							// no letters or digits yet, so probably not a sentence terminator
+							is_end_clause = 0;
+						}
 					}
-					if(iswlower(c2))
+
+					if(is_end_clause && (c1 == '.') && (c_next == '<') && option_ssml)
 					{
-						c2 = ' ';
-						continue;  // next word has no capital letter, this dot is probably from an abbreviation
+						// wait until after the end of the xml tag, then look for upper-case letter
+						is_end_clause = 0;
+						end_clause_index = ix;
+						end_clause_after_tag = punct_data;
 					}
-					if(any_alnum==0)
+				}
+
+				if(is_end_clause)
+				{
+					UngetC(c_next);
+					buf[ix] = ' ';
+					buf[ix+1] = 0;
+
+					if(iswdigit(cprev) && !IsAlpha(c_next))   // ????
 					{
-						c2 = ' ';   // no letters or digits yet, so probably not a sentence terminator
-						continue;
+						punct_data &= ~CLAUSE_DOT;
 					}
+					if(nl_count > 1)
+					{
+						if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
+							return(punct_data + 35);   // with a longer pause
+						return(CLAUSE_PARAGRAPH);
+					}
+					return(punct_data);   // only recognise punctuation if followed by a blank or bracket/quote
 				}
-	
-				punct_data = punct_attributes[punct];
-				if(nl_count > 1)
+				else
 				{
-					if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
-						return(punct_data + 35);   // with a longer pause
-					return(CLAUSE_PARAGRAPH);
+					if(!Eof())
+					{
+						if(iswspace(c2))
+							UngetC(c_next);
+					}
 				}
-				return(punct_data);   // only recognise punctuation if followed by a blank or bracket/quote
 			}
 		}
 
 		if(speech_parameters[espeakSILENCE]==1)
 			continue;
 
+		if(c1 == announced_punctuation)
+		{
+			// This character has already been announced, so delete it so that it isn't spoken a second time.
+			// Unless it's a hyphen or apostrophe (which is used by TranslateClause() )
+			if(IsBracket(c1))
+			{
+				c1 = 0xe000 + '(';   // Unicode private useage area.  So TranslateRules() knows the bracket name has been spoken
+			}
+			else
+			if(c1 != '-')
+			{
+				c1 = ' ';
+			}
+		}
+
 		j = ix+1;
+
+		if(c1 == 0xe000 + '<') c1 = '<';
+
 		ix += utf8_out(c1,&buf[ix]);    //	buf[ix++] = c1;
 		if(!iswspace(c1) && !IsBracket(c1))
 		{
@@ -2379,10 +2909,11 @@ if(option_ssml) parag=1;
 		}
 		*charix_top = ix;
 
-		if(((ix > (n_buf-20)) && !IsAlpha(c1) && !iswdigit(c1))  ||  (ix >= (n_buf-2)))
+		if(((ix > (n_buf-75)) && !IsAlpha(c1) && !iswdigit(c1))  ||  (ix >= (n_buf-4)))
 		{
 			// clause too long, getting near end of buffer, so break here
 			// try to break at a word boundary (unless we actually reach the end of buffer).
+			// (n_buf-4) is to allow for 3 bytes of multibyte character plus terminator.
 			buf[ix] = ' ';
 			buf[ix+1] = 0;
 			UngetC(c2);
@@ -2394,6 +2925,10 @@ if(option_ssml) parag=1;
 	{
 		ix += utf8_out(CHAR_EMPHASIS, &buf[ix]);
 	}
+	if(end_clause_after_tag)
+	{
+		RemoveChar(&buf[end_clause_index]);  // delete clause-end punctiation
+	}
 	buf[ix] = ' ';
 	buf[ix+1] = 0;
 	return(CLAUSE_EOF);   //  end of file
@@ -2417,6 +2952,7 @@ void InitText2(void)
 	int param;
 
 	ungot_char = 0;
+	ungot_char2 = 0;
 
 	n_ssml_stack =1;
 	n_param_stack = 1;
@@ -2431,6 +2967,7 @@ void InitText2(void)
 	current_voice_id[0] = 0;
 
 	ignore_text = 0;
+	audio_text = 0;
 	clear_skipping_text = 0;
 	count_characters = -1;
 	sayas_mode = 0;