build against hunspell (http://hunspell.sf.net/) instead of myspell.

hunspell will be replacing myspell in a future version of OpenOffice.org. it is compatible with myspell's dictionaries and offers a lot of improvements for non-western languages. we can no longer build against a system version of myspell. we will always build against our own copy of hunspell unless told otherwise. this is bug 9820 git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@21089 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
author: Dom Lachowicz <domlachowicz@gmail.com> 2006-01-14 02:18:48 +0000
committer: Dom Lachowicz <domlachowicz@gmail.com> 2006-01-14 02:18:48 +0000
commit: 7f5d852c3116af74620e630a776b6a8e03f8e5c9 (patch)
tree: 08856630ce7f546ecafe18d7d68cbc41f13113ef
parent: 48a8a34b95d427464cc9ca8af9fbf2900f1dcf30 (diff)
download: enchant-7f5d852c3116af74620e630a776b6a8e03f8e5c9.tar.gz
21 files changed, 8926 insertions, 1140 deletions
diff --git a/configure.in b/configure.in
index 1cca3b8..f106836 100644
--- a/configure.in
+++ b/configure.in
@@ -123,12 +123,6 @@ if test "x$with_myspell_dir" != "x" ; then
    myspell_dir=$with_myspell_dir
 fi
 
-with_system_myspell=no
-if test "x$build_myspell" != "xno"; then
-   PKG_CHECK_MODULES(MYSPELL, myspell, with_system_myspell=yes, with_system_myspell=no)
-fi
-AM_CONDITIONAL(WITH_SYSTEM_MYSPELL, test "x$with_system_myspell" = "xyes")
-
 MYSPELL_CFLAGS="$MYSPELL_CFLAGS -DENCHANT_MYSPELL_DICT_DIR='\"$myspell_dir\"'"
 if test "x$with_system_myspell" != "xno"; then
    MYSPELL_CFLAGS="$MYSPELL_CFLAGS -DWITH_SYSTEM_MYSPELL=1"
@@ -273,7 +267,6 @@ $PACKAGE-$VERSION
 	Build Ispell backend:           ${build_ispell}
 	Build Uspell backend:           ${build_uspell}
 	Build Hspell backend:           ${build_hspell}
-	Build Myspell backend:          ${build_myspell}
-	Build against system Myspell:   ${with_system_myspell}
+	Build Myspell/Hunspell backend: ${build_myspell}
 	Build with Binreloc		$br_cv_binreloc
 "
diff --git a/src/myspell/Makefile.am b/src/myspell/Makefile.am
index 1f84195..7a57c3d 100644
--- a/src/myspell/Makefile.am
+++ b/src/myspell/Makefile.am
@@ -13,43 +13,33 @@ libenchant_myspell_lalibdir=$(libdir)/enchant
 libenchant_myspell_la_LIBADD= $(MYSPELL_LIBS) $(ENCHANT_LIBS) $(top_builddir)/src/libenchant.la
 libenchant_myspell_la_LDFLAGS = -version-info $(VERSION_INFO) -no-undefined
 
-if WITH_SYSTEM_MYSPELL
 libenchant_myspell_la_SOURCES =	\
-	myspell_checker.cpp
-else
-libenchant_myspell_la_SOURCES =	\
-	affentry.cxx		\
-	affentry.hxx		\
-	affixmgr.cxx		\
-	affixmgr.hxx		\
-	atypes.hxx		\
-	baseaffix.hxx		\
-	csutil.cxx		\
-	csutil.hxx		\
-	hashmgr.cxx		\
-	hashmgr.hxx		\
-	htypes.hxx		\
-	myspell.cxx		\
-	enchant_myspell.hxx	\
-	suggestmgr.cxx		\
-	suggestmgr.hxx		\
-	myspell_checker.cpp
-endif
-
-EXTRA_DIST=			\
-	license.readme		\
 	affentry.hxx		\
 	affixmgr.hxx		\
 	atypes.hxx		\
 	baseaffix.hxx		\
 	csutil.hxx		\
+	dictmgr.hxx		\
 	hashmgr.hxx		\
 	htypes.hxx		\
-	enchant_myspell.hxx	\
+	hunspell.hxx		\
+	langnum.hxx		\
 	suggestmgr.hxx		\
 	affentry.cxx		\
 	affixmgr.cxx		\
 	csutil.cxx		\
+	dictmgr.cxx		\
 	hashmgr.cxx		\
-	myspell.cxx		\
-	suggestmgr.cxx
+	hunspell.cxx		\
+	suggestmgr.cxx		\
+	myspell_checker.cpp
+
+EXTRA_DIST=			\
+	license.readme		\
+	utf_info.cxx		\
+	README			\
+	license.hunspell	\
+	license.myspell		\
+	license.readme		\
+	hunspell.dsp
+
diff --git a/src/myspell/affentry.cxx b/src/myspell/affentry.cxx
index 603616d..014e925 100644
--- a/src/myspell/affentry.cxx
+++ b/src/myspell/affentry.cxx
@@ -1,5 +1,5 @@
-#include "license.readme"
-
+#include "license.hunspell"
+#include "license.myspell"
 
 #include <cctype>
 #include <cstring>
@@ -7,13 +7,12 @@
 #include <cstdio>
 
 #include "affentry.hxx"
+#include "csutil.hxx"
 
-#ifndef WINDOWS
+#ifndef W32
 using namespace std;
 #endif
 
-extern char * mystrdup(const char * s);
-extern char *  myrevstrdup(const char * s);
 
 PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
 {
@@ -21,73 +20,216 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
   pmyMgr = pmgr;
 
   // set up its intial values
-  achar = dp->achar;         // char flag 
+ 
+  aflag = dp->aflag;         // flag 
   strip = dp->strip;         // string to strip
   appnd = dp->appnd;         // string to append
   stripl = dp->stripl;       // length of strip string
   appndl = dp->appndl;       // length of append string
   numconds = dp->numconds;   // number of conditions to match
-  xpflg = dp->xpflg;         // cross product flag
+  opts = dp->opts;         // cross product flag
   // then copy over all of the conditions
-  memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
+  memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
   next = NULL;
   nextne = NULL;
   nexteq = NULL;
+  morphcode = dp->morphcode;
+  contclass = dp->contclass;
+  contclasslen = dp->contclasslen;
 }
 
 
 PfxEntry::~PfxEntry()
 {
-    achar = '\0';
+    aflag = 0;
     if (appnd) free(appnd);
-    if (strip)free(strip);
+    if (strip) free(strip);
     pmyMgr = NULL;
     appnd = NULL;
-    strip = NULL;    
+    strip = NULL;
+    if (opts & aeUTF8) {
+        for (int i = 0; i < 8; i++) {
+            if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
+        }
+    }
+    if (morphcode && !(opts & aeALIASM)) free(morphcode);
+    if (contclass && !(opts & aeALIASF)) free(contclass);
 }
 
-
-
 // add prefix to this word assuming conditions hold
 char * PfxEntry::add(const char * word, int len)
 {
-    int			cond;
-    char	        tword[MAXWORDLEN+1];
+    char tword[MAXWORDUTF8LEN + 4];
 
-     /* make sure all conditions match */
-     if ((len > stripl) && (len >= numconds)) {
-            unsigned char * cp = (unsigned char *) word;
-            for (cond = 0;  cond < numconds;  cond++) {
-	       if ((conds[*cp++] & (1 << cond)) == 0)
-	          break;
-            }
-            if (cond >= numconds) {
-	      /* we have a match so add prefix */
-              int tlen = 0;
+    if ((len > stripl) && (len >= numconds) && test_condition(word) &&
+       (!stripl || (strncmp(word, strip, stripl) == 0)) && 
+       ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
+    /* we have a match so add prefix */
+              char * pp = tword;
               if (appndl) {
-	          strcpy(tword,appnd);
-                  tlen += appndl;
-               } 
-               char * pp = tword + tlen;
+                  strcpy(tword,appnd);
+                  pp += appndl;
+               }
                strcpy(pp, (word + stripl));
                return mystrdup(tword);
-	    }
      }
      return NULL;    
 }
 
 
+inline int PfxEntry::test_condition(const char * st)
+{
+    int cond;
+    unsigned char * cp = (unsigned char *)st;
+    if (!(opts & aeUTF8)) { // 256-character codepage
+        for (cond = 0;  cond < numconds;  cond++) {
+	    if ((conds.base[*cp++] & (1 << cond)) == 0) return 0;
+        }
+    } else { // UTF-8 encoding
+      unsigned short wc;
+      for (cond = 0;  cond < numconds;  cond++) {
+        // a simple 7-bit ASCII character in UTF-8
+        if ((*cp >> 7) == 0) {
+            // also check limit (end of word)
+	    if ((!*cp) || ((conds.utf8.ascii[*cp++] & (1 << cond)) == 0)) return 0;
+        // UTF-8 multibyte character
+        } else {
+            // not dot wildcard in rule
+            if (!conds.utf8.all[cond]) {
+                if (conds.utf8.neg[cond]) {
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (conds.utf8.wchars[cond] && 
+                        flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                            wc, (short) conds.utf8.wlen[cond])) return 0;
+                } else {
+                    if (!conds.utf8.wchars[cond]) return 0;
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                         wc, (short)conds.utf8.wlen[cond])) return 0;
+                }
+            }
+            // jump to next UTF-8 character
+            for(cp++; (*cp & 0xc0) == 0x80; cp++);
+        }
+      }
+    }
+    return 1;
+}
 
 
 // check if this prefix entry matches 
-struct hentry * PfxEntry::check(const char * word, int len)
+struct hentry * PfxEntry::check(const char * word, int len, char in_compound, const FLAG needflag)
 {
-    int			cond;	// condition number being examined
     int	                tmpl;   // length of tmpword
     struct hentry *     he;     // hash entry of root word or NULL
-    unsigned char *	cp;		
-    char	        tmpword[MAXWORDLEN+1];
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+            if (test_condition(tmpword)) {
+		tmpl += stripl;
+		if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+                   do {
+		      if (TESTAFF(he->astr, aflag, he->alen) &&
+                        // forbid single prefixes with pseudoroot flag
+                        ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+                        // needflag
+                        ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+                         (contclass && TESTAFF(contclass, needflag, contclasslen))))
+                            return he;
+                   } while ((he = he->next_homonym)); // check homonyms
+		}
+                
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // ross checked combined with a suffix
+
+		//if ((opts & aeXPRODUCT) && in_compound) {
+		if ((opts & aeXPRODUCT)) {
+		   he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL, 
+                        0, NULL, FLAG_NULL, needflag, in_compound);
+                   if (he) return he;
+		}
+	    }
+     }
+    return NULL;
+}
+
+// check if this prefix entry matches 
+struct hentry * PfxEntry::check_twosfx(const char * word, int len,
+    char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    struct hentry *     he;     // hash entry of root word or NULL
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
 
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition(tmpword)) {
+		tmpl += stripl;
+
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // cross checked combined with a suffix
+
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		   he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag);
+                   if (he) return he;
+		}
+	    }
+     }
+    return NULL;
+}
+
+
+// check if this prefix entry matches 
+char * PfxEntry::check_twosfx_morph(const char * word, int len,
+         char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    char	        tmpword[MAXWORDUTF8LEN + 4];
 
     // on entry prefix is 0 length or already matches the beginning of the word.
     // So if the remaining root word has positive length
@@ -109,117 +251,317 @@ struct hentry * PfxEntry::check(const char * word, int len)
             // this file for more info on exactly what is being
             // tested
 
-	    cp = (unsigned char *)tmpword;
-	    for (cond = 0;  cond < numconds;  cond++) {
-		if ((conds[*cp++] & (1 << cond)) == 0) break;
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition(tmpword)) {
+		tmpl += stripl;
+
+		// prefix matched but no root word was found 
+                // if aeXPRODUCT is allowed, try again but now 
+                // ross checked combined with a suffix
+
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		    return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
+			     aeXPRODUCT, (AffEntry *)this, needflag);
+		}
 	    }
+     }
+    return NULL;
+}
+
+// check if this prefix entry matches 
+char * PfxEntry::check_morph(const char * word, int len, char in_compound, const FLAG needflag)
+{
+    int	                tmpl;   // length of tmpword
+    struct hentry *     he;     // hash entry of root word or NULL
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    char                result[MAXLNLEN];
+    char * st;
+    
+    *result = '\0';
+
+    // on entry prefix is 0 length or already matches the beginning of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+     tmpl = len - appndl;
+
+     if ((tmpl > 0) &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing prefix and adding
+	    // back any characters that would have been stripped
+
+	    if (stripl) strcpy (tmpword, strip);
+	    strcpy ((tmpword + stripl), (word + appndl));
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
 
             // if all conditions are met then check if resulting
             // root word in the dictionary
 
-	    if (cond >= numconds) {
+	    if (test_condition(tmpword)) {
 		tmpl += stripl;
 		if ((he = pmyMgr->lookup(tmpword)) != NULL) {
-		   if (TESTAFF(he->astr, achar, he->alen)) return he;
+                    do {
+		      if (TESTAFF(he->astr, aflag, he->alen) &&
+                        // forbid single prefixes with pseudoroot flag
+                        ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+                        // needflag
+                        ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+                         (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+                            if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
+                            if (he->description) {
+                                if ((*(he->description)=='[')||(*(he->description)=='<')) strcat(result,he->word);
+                                strcat(result,he->description);
+                            }
+                            strcat(result, "\n");
+                      }
+                    } while ((he = he->next_homonym));
 		}
 
 		// prefix matched but no root word was found 
-                // if XPRODUCT is allowed, try again but now 
+                // if aeXPRODUCT is allowed, try again but now 
                 // ross checked combined with a suffix
 
-		if (xpflg & XPRODUCT) {
-		   he = pmyMgr->suffix_check(tmpword, tmpl, XPRODUCT, (AffEntry *)this);
-                   if (he) return he;
+		if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+		   st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, 
+                     FLAG_NULL, needflag);
+                   if (st) {
+                        strcat(result, st);
+                        free(st);
+                   }
 		}
 	    }
      }
+     
+    if (*result) return mystrdup(result);
     return NULL;
 }
 
 
-
 SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
 {
   // register affix manager
   pmyMgr = pmgr;
 
   // set up its intial values
-  achar = dp->achar;         // char flag 
+  aflag = dp->aflag;         // char flag 
   strip = dp->strip;         // string to strip
   appnd = dp->appnd;         // string to append
   stripl = dp->stripl;       // length of strip string
   appndl = dp->appndl;       // length of append string
   numconds = dp->numconds;   // number of conditions to match
-  xpflg = dp->xpflg;         // cross product flag
+  opts = dp->opts;         // cross product flag
 
   // then copy over all of the conditions
-  memcpy(&conds[0],&dp->conds[0],SETSIZE*sizeof(conds[0]));
+  memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
 
   rappnd = myrevstrdup(appnd);
+
+  morphcode = dp->morphcode;
+  contclass = dp->contclass;
+  contclasslen = dp->contclasslen;
 }
 
 
 SfxEntry::~SfxEntry()
 {
-    achar = '\0';
+    aflag = 0;
     if (appnd) free(appnd);
     if (rappnd) free(rappnd);
     if (strip) free(strip);
     pmyMgr = NULL;
     appnd = NULL;
     strip = NULL;    
+    if (opts & aeUTF8) {
+        for (int i = 0; i < 8; i++) {
+            if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);  
+        }
+    }
+    if (morphcode && !(opts & aeALIASM)) free(morphcode);
+    if (contclass && !(opts & aeALIASF)) free(contclass);
 }
 
-
-
 // add suffix to this word assuming conditions hold
 char * SfxEntry::add(const char * word, int len)
 {
-    int			cond;
-    char	        tword[MAXWORDLEN+1];
+    char	        tword[MAXWORDUTF8LEN + 4];
 
      /* make sure all conditions match */
-     if ((len > stripl) && (len >= numconds)) {
-            unsigned char * cp = (unsigned char *) (word + len);
-            for (cond = numconds; --cond >=0; ) {
-	       if ((conds[*--cp] & (1 << cond)) == 0)
-	          break;
-            }
-            if (cond < 0) {
+     if ((len > stripl) && (len >= numconds) && test_condition(word + len, word) &&
+        (!stripl || (strcmp(word + len - stripl, strip) == 0)) &&
+        ((MAXWORDUTF8LEN + 4) > (len + appndl - stripl))) {
 	      /* we have a match so add suffix */
               strcpy(tword,word);
-              int tlen = len;
-              if (stripl) {
-		 tlen -= stripl;
-              }
-              char * pp = (tword + tlen);
               if (appndl) {
-	          strcpy(pp,appnd);
-                  tlen += appndl;
-	      } else *pp = '\0';
-               return mystrdup(tword);
-	    }
+                  strcpy(tword + len - stripl, appnd);
+              } else {
+                  *(tword + len - stripl) = '\0';
+              }
+              return mystrdup(tword);
      }
      return NULL;
 }
 
 
+inline int SfxEntry::test_condition(const char * st, const char * beg)
+{
+    int cond;
+    unsigned char * cp = (unsigned char *) st;
+    if (!(opts & aeUTF8)) { // 256-character codepage
+        // D�m�lki affix algorithm
+	for (cond = numconds;  --cond >= 0; ) {
+	    if ((conds.base[*--cp] & (1 << cond)) == 0) return 0;
+	}
+    } else { // UTF-8 encoding
+      unsigned short wc;
+      for (cond = numconds;  --cond >= 0; ) {
+        // go to next character position and check limit
+        if ((char *) --cp < beg) return 0;
+        // a simple 7-bit ASCII character in UTF-8
+        if ((*cp >> 7) == 0) {
+	    if ((conds.utf8.ascii[*cp] & (1 << cond)) == 0) return 0;
+        // UTF-8 multibyte character
+        } else {
+            // go to first character of UTF-8 multibyte character
+            for (; (*cp & 0xc0) == 0x80; cp--);
+            // not dot wildcard in rule
+            if (!conds.utf8.all[cond]) {
+                if (conds.utf8.neg[cond]) {
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (conds.utf8.wchars[cond] && 
+                        flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                            wc, (short) conds.utf8.wlen[cond])) return 0;
+                } else {
+                    if (!conds.utf8.wchars[cond]) return 0;
+                    u8_u16((w_char *) &wc, 1, (char *) cp);
+                    if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
+                         wc, (short)conds.utf8.wlen[cond])) return 0;
+                }
+            }
+        }
+      }
+    }
+    return 1;
+}
+
+
 
 // see if this suffix is present in the word 
-struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEntry* ppfx)
+struct hentry * SfxEntry::check(const char * word, int len, int optflags,
+    AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag)
+{
+    int	                tmpl;		 // length of tmpword 
+    struct hentry *     he;              // hash entry pointer
+    unsigned char *	cp;
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    PfxEntry* ep = (PfxEntry *) ppfx;
+
+    // if this suffix is being cross checked with a prefix
+    // but it does not support cross products skip it
+
+    if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
+        return NULL;
+
+    // upon entry suffix is 0 length or already matches the end of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+    tmpl = len - appndl;
+    // the second condition is not enough for UTF-8 strings
+    // it checked in test_condition()
+    
+    if ((tmpl > 0)  &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing suffix and adding
+	    // back any characters that would have been stripped or
+	    // or null terminating the shorter string
+
+	    strcpy (tmpword, word);
+	    cp = (unsigned char *)(tmpword + tmpl);
+	    if (stripl) {
+		strcpy ((char *)cp, strip);
+		tmpl += stripl;
+		cp = (unsigned char *)(tmpword + tmpl);
+	    } else *cp = '\0';
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being            // tested
+
+            // if all conditions are met then check if resulting
+            // root word in the dictionary
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+
+#ifdef SZOSZABLYA_POSSIBLE_ROOTS
+		fprintf(stdout,"%s %s %c\n", word, tmpword, aflag);
+#endif
+	        if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+                    do {
+                        // check conditional suffix (enabled by prefix)
+                        if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
+                                    TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && 
+                            (((optflags & aeXPRODUCT) == 0) || 
+                            TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+                             // enabled by prefix
+                            ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+                            ) &&
+                            // handle cont. class
+                            ((!cclass) || 
+                                ((contclass) && TESTAFF(contclass, cclass, contclasslen))
+                            ) &&
+                            // handle required flag
+                            ((!needflag) || 
+                              (TESTAFF(he->astr, needflag, he->alen) ||
+                              ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
+                            )
+                        ) return he;
+                    } while ((he = he->next_homonym)); // check homonyms
+
+                // obsolote stemming code (used only by the 
+                // experimental SuffixMgr:suggest_pos_stems)
+	        // store resulting root in wlst
+		} else if (wlst && (*ns < maxSug)) {
+		    int cwrd = 1;
+        	    for (int k=0; k < *ns; k++) 
+			if (strcmp(tmpword, wlst[k]) == 0) cwrd = 0;
+        	    if (cwrd) {
+			wlst[*ns] = mystrdup(tmpword);
+			if (wlst[*ns] == NULL) {
+			    for (int j=0; j<*ns; j++) free(wlst[j]);
+			    *ns = -1;
+			    return NULL;
+			}
+			(*ns)++;
+		    }
+		}
+	    }
+    }
+    return NULL;
+}
+
+// see if two-level suffix is present in the word 
+struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
+    AffEntry* ppfx, const FLAG needflag)
 {
     int	                tmpl;		 // length of tmpword 
-    int			cond;		 // condition beng examined
     struct hentry *     he;              // hash entry pointer
     unsigned char *	cp;
-    char	        tmpword[MAXWORDLEN+1];
+    char	        tmpword[MAXWORDUTF8LEN + 4];
     PfxEntry* ep = (PfxEntry *) ppfx;
 
 
     // if this suffix is being cross checked with a prefix
     // but it does not support cross products skip it
 
-    if ((optflags & XPRODUCT) != 0 &&  (xpflg & XPRODUCT) == 0)
+    if ((optflags & aeXPRODUCT) != 0 &&  (opts & aeXPRODUCT) == 0)
         return NULL;
 
     // upon entry suffix is 0 length or already matches the end of the word.
@@ -248,25 +590,135 @@ struct hentry * SfxEntry::check(const char * word, int len, int optflags, AffEnt
             // this file for more info on exactly what is being
             // tested
 
-	    for (cond = numconds;  --cond >= 0; ) {
-		if ((conds[*--cp] & (1 << cond)) == 0) break;
-	    }
+            // if all conditions are met then recall suffix_check
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+                if (ppfx) {
+                    // handle conditional suffix
+                    if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) 
+                        he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
+                    else
+                        he = pmyMgr->suffix_check(tmpword, tmpl, optflags, ppfx, NULL, 0, NULL, (FLAG) aflag, needflag);
+                } else {
+                    he = pmyMgr->suffix_check(tmpword, tmpl, 0, NULL, NULL, 0, NULL, (FLAG) aflag, needflag);
+                }
+                if (he) return he;
+            }
+    }
+    return NULL;
+}
 
-            // if all conditions are met then check if resulting
-            // root word in the dictionary
 
-	    if (cond < 0) {
-	        if ((he = pmyMgr->lookup(tmpword)) != NULL) {
-                     if (TESTAFF(he->astr, achar , he->alen) && 
-                           ((optflags & XPRODUCT) == 0 || 
-                           TESTAFF(he->astr, ep->getFlag(), he->alen))) return he;
-	        }  
-	    }
+// see if two-level suffix is present in the word 
+char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
+    AffEntry* ppfx, const FLAG needflag)
+{
+    int	                tmpl;		 // length of tmpword 
+    unsigned char *	cp;
+    char	        tmpword[MAXWORDUTF8LEN + 4];
+    PfxEntry* ep = (PfxEntry *) ppfx;
+    char * st;
+
+    char result[MAXLNLEN];
+    
+    *result = '\0';
+
+    // if this suffix is being cross checked with a prefix
+    // but it does not support cross products skip it
+
+    if ((optflags & aeXPRODUCT) != 0 &&  (opts & aeXPRODUCT) == 0)
+        return NULL;
+
+    // upon entry suffix is 0 length or already matches the end of the word.
+    // So if the remaining root word has positive length
+    // and if there are enough chars in root word and added back strip chars
+    // to meet the number of characters conditions, then test it
+
+    tmpl = len - appndl;
+
+    if ((tmpl > 0)  &&  (tmpl + stripl >= numconds)) {
+
+	    // generate new root word by removing suffix and adding
+	    // back any characters that would have been stripped or
+	    // or null terminating the shorter string
+
+	    strcpy (tmpword, word);
+	    cp = (unsigned char *)(tmpword + tmpl);
+	    if (stripl) {
+		strcpy ((char *)cp, strip);
+		tmpl += stripl;
+		cp = (unsigned char *)(tmpword + tmpl);
+	    } else *cp = '\0';
+
+            // now make sure all of the conditions on characters
+            // are met.  Please see the appendix at the end of
+            // this file for more info on exactly what is being
+            // tested
+
+            // if all conditions are met then recall suffix_check
+
+	    if (test_condition((char *) cp, (char *) tmpword)) {
+                if (ppfx) {
+                    // handle conditional suffix
+                    if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
+                        if (st) {
+                            if (((PfxEntry *) ppfx)->getMorph()) {
+                                strcat(result, ((PfxEntry *) ppfx)->getMorph());
+                            }
+                            strcat(result,st);
+                            free(st);
+                            mychomp(result);
+                        }
+                    } else {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, optflags, ppfx, aflag, needflag);
+                        if (st) {
+                            strcat(result, st);
+                            free(st);
+                            mychomp(result);
+                        }
+                    }
+                } else {
+                        st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
+                        if (st) {
+                            strcat(result, st);
+                            free(st);
+                            mychomp(result);
+                        }
+                }
+                if (*result) return mystrdup(result);
+            }
     }
     return NULL;
 }
 
+// get next homonym with same affix
+struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx, 
+    const FLAG cclass, const FLAG needflag)
+{
+    PfxEntry* ep = (PfxEntry *) ppfx;
 
+    while (he->next_homonym) {
+        he = he->next_homonym;
+        if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() && TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && 
+                            ((optflags & aeXPRODUCT) == 0 || 
+                            TESTAFF(he->astr, ep->getFlag(), he->alen) ||
+                             // handle conditional suffix
+                            ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+                            ) &&
+                            // handle cont. class
+                            ((!cclass) || 
+                                ((contclass) && TESTAFF(contclass, cclass, contclasslen))
+                            ) &&
+                            // handle required flag
+                            ((!needflag) || 
+                              (TESTAFF(he->astr, needflag, he->alen) ||
+                              ((contclass) && TESTAFF(contclass, needflag, contclasslen)))
+                            )
+                        ) return he;
+    }
+    return NULL;
+}
 
 
 #if 0
@@ -286,14 +738,14 @@ The structure affentry is defined as follows:
 
 struct affentry
 {
-   unsigned char achar;   // char used to represent the affix
-   char * strip;          // string to strip before adding affix
-   char * appnd;          // the affix string to add
-   short  stripl;         // length of the strip string
-   short  appndl;         // length of the affix string
-   short  numconds;       // the number of conditions that must be met
-   short  xpflg;          // flag: XPRODUCT- combine both prefix and suffix 
-   char   conds[SETSIZE]; // array which encodes the conditions to be met
+   unsigned short aflag;    // ID used to represent the affix
+   char * strip;            // string to strip before adding affix
+   char * appnd;            // the affix string to add
+   unsigned char stripl;    // length of the strip string
+   unsigned char appndl;    // length of the affix string
+   char numconds;           // the number of conditions that must be met
+   char opts;               // flag: aeXPRODUCT- combine both prefix and suffix 
+   char   conds[SETSIZE];   // array which encodes the conditions to be met
 };
 
 
diff --git a/src/myspell/affentry.hxx b/src/myspell/affentry.hxx
index 9c4713c..1dd784a 100644
--- a/src/myspell/affentry.hxx
+++ b/src/myspell/affentry.hxx
@@ -5,7 +5,6 @@
 #include "baseaffix.hxx"
 #include "affixmgr.hxx"
 
-
 /* A Prefix Entry  */
 
 class PfxEntry : public AffEntry
@@ -22,13 +21,29 @@ public:
   PfxEntry(AffixMgr* pmgr, affentry* dp );
   ~PfxEntry();
 
-  struct hentry *      check(const char * word, int len);
+  inline bool          allowCross() { return ((opts & aeXPRODUCT) != 0); }
+  struct hentry *      check(const char * word, int len, char in_compound, 
+                            const FLAG needflag = FLAG_NULL);
+
+  struct hentry *      check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = NULL);
+
+  char *      check_morph(const char * word, int len, char in_compound,
+                            const FLAG needflag = FLAG_NULL);
+
+  char *      check_twosfx_morph(const char * word, int len,
+                  char in_compound, const FLAG needflag = FLAG_NULL);
 
-  inline bool          allowCross() { return ((xpflg & XPRODUCT) != 0); }
-  inline unsigned char getFlag()   { return achar;   }
+  inline FLAG getFlag()   { return aflag;   }
   inline const char *  getKey()    { return appnd;  } 
   char *               add(const char * word, int len);
 
+  inline short getKeyLen() { return appndl; } 
+
+  inline const char *  getMorph()    { return morphcode;  } 
+
+  inline const unsigned short * getCont()    { return contclass;  } 
+  inline short           getContLen()    { return contclasslen;  } 
+
   inline PfxEntry *    getNext()   { return next;   }
   inline PfxEntry *    getNextNE() { return nextne; }
   inline PfxEntry *    getNextEQ() { return nexteq; }
@@ -38,6 +53,8 @@ public:
   inline void   setNextNE(PfxEntry * ptr) { nextne = ptr; }
   inline void   setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
   inline void   setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
+  
+  inline int    test_condition(const char * st);
 };
 
 
@@ -54,23 +71,50 @@ class SfxEntry : public AffEntry
        SfxEntry *   nexteq;
        SfxEntry *   nextne;
        SfxEntry *   flgnxt;
+	   
+       SfxEntry *   l_morph;
+       SfxEntry *   r_morph;
+       SfxEntry *   eq_morph;
 
 public:
 
   SfxEntry(AffixMgr* pmgr, affentry* dp );
   ~SfxEntry();
 
+  inline bool          allowCross() { return ((opts & aeXPRODUCT) != 0); }
   struct hentry *   check(const char * word, int len, int optflags, 
-                                                       AffEntry* ppfx);
+                    AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
+                    const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL);
+
+  struct hentry *   check_twosfx(const char * word, int len, int optflags, AffEntry* ppfx, const FLAG needflag = NULL);
 
-  inline bool          allowCross() { return ((xpflg & XPRODUCT) != 0); }
-  inline unsigned char getFlag()   { return achar;   }
+  char *      check_twosfx_morph(const char * word, int len, int optflags,
+                 AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+  struct hentry * get_next_homonym(struct hentry * he);
+  struct hentry * get_next_homonym(struct hentry * word, int optflags, AffEntry* ppfx, 
+    const FLAG cclass, const FLAG needflag);
+
+
+  inline FLAG getFlag()   { return aflag;   }
   inline const char *  getKey()    { return rappnd; } 
   char *               add(const char * word, int len);
 
+
+  inline const char *  getMorph()    { return morphcode;  } 
+
+  inline const unsigned short * getCont()    { return contclass;  } 
+  inline short           getContLen()    { return contclasslen;  } 
+  inline const char *  getAffix()    { return appnd; } 
+
+  inline short getKeyLen() { return appndl; } 
+
   inline SfxEntry *    getNext()   { return next;   }
   inline SfxEntry *    getNextNE() { return nextne; }
   inline SfxEntry *    getNextEQ() { return nexteq; }
+
+  inline SfxEntry *    getLM() { return l_morph; }
+  inline SfxEntry *    getRM() { return r_morph; }
+  inline SfxEntry *    getEQM() { return eq_morph; }
   inline SfxEntry *    getFlgNxt() { return flgnxt; }
 
   inline void   setNext(SfxEntry * ptr)   { next = ptr;   }
@@ -78,9 +122,9 @@ public:
   inline void   setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
   inline void   setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
 
+  inline int    test_condition(const char * st, const char * begin);
 };
 
-
 #endif
 
 
diff --git a/src/myspell/affixmgr.cxx b/src/myspell/affixmgr.cxx
index 3a5714b..69220e5 100644
--- a/src/myspell/affixmgr.cxx
+++ b/src/myspell/affixmgr.cxx
@@ -1,50 +1,104 @@
-#include "license.readme"
+#include "license.hunspell"
+#include "license.myspell"
 
 #include <cstdlib>
 #include <cstring>
+#include <cctype>
 #include <cstdio>
 
 #include "affixmgr.hxx"
 #include "affentry.hxx"
+#include "langnum.hxx"
 
-#ifndef WINDOWS
+#include "csutil.hxx"
+
+#ifndef W32
 using namespace std;
 #endif
 
-
-// First some base level utility routines
-extern void   mychomp(char * s);
-extern char * mystrdup(const char * s);
-extern char * myrevstrdup(const char * s);
-extern char * mystrsep(char ** sptr, const char delim);
-extern int    isSubset(const char * s1, const char * s2); 
-extern int    isRevSubset(const char * s1, const char * end_of_s2, int len_s2); 
-
-
 AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) 
 {
   // register hash manager and load affix data from aff file
   pHMgr = ptr;
   trystring = NULL;
   encoding=NULL;
-  reptable = NULL;
-  numrep = 0;
+  utf8 = 0;
+  utf_tbl = NULL;
+  complexprefixes = 0;
   maptable = NULL;
   nummap = 0;
-  compound=NULL;
-  nosplitsugs= (0==1);
-
+  breaktable = NULL;
+  numbreak = 0;
+  reptable = NULL;
+  numrep = 0;
+  checkcpdtable = NULL;
+  numcheckcpd = 0;
+  defcpdtable = NULL;
+  numdefcpd = 0;
+  compoundflag = FLAG_NULL; // permits word in compound forms
+  compoundbegin = FLAG_NULL; // may be first word in compound forms
+  compoundmiddle = FLAG_NULL; // may be middle word in compound forms
+  compoundend = FLAG_NULL; // may be last word in compound forms
+  compoundroot = FLAG_NULL; // compound word signing flag
+  compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+  compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+  checkcompounddup = 0; // forbid double words in compounds
+  checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
+  checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
+  checkcompoundtriple = 0; // forbid compounds with triple letters
+  forbiddenword = FLAG_NULL; // forbidden word signing flag
+  nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
+  lang = NULL; // language
+  langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
+  pseudoroot = FLAG_NULL; // forbidden root, allowed only with suffixes
+  cpdwordmax=0; // default: unlimited wordcount in compound words
   cpdmin = 3;  // default value
+  cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
+  cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
+  cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
+  cpdvowels_utf16_len=0; // vowels
+  pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
+  sfxappnd=NULL; // previous suffix for counting a special syllables BUG
+  cpdsyllablenum=NULL; // syllable count incrementing flag
+  checknum=0; // checking numbers, and word with numbers
+  wordchars=NULL; // letters + spec. word characters
+  wordchars_utf16=NULL; // letters + spec. word characters
+  wordchars_utf16_len=0; // letters + spec. word characters
+  version=NULL; // affix and dictionary file version string
+  havecontclass=0; // flags of possible continuing classes (double affix)
+  // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
+  // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
+  lemma_present = FLAG_NULL; 
+  circumfix = FLAG_NULL; 
+  onlyincompound = FLAG_NULL; 
+  flag_mode = FLAG_CHAR; // default one-character flags in affix and dic file
+  maxngramsugs = -1; // undefined
+  nosplitsugs = 0;
+  sugswithdots = 0;
+  keepcase = 0;
+  checksharps = 0;
+
+  derived = NULL; // XXX not threadsafe variable for experimental stemming
+  sfx = NULL;
+  pfx = NULL;
+
   for (int i=0; i < SETSIZE; i++) {
      pStart[i] = NULL;
      sStart[i] = NULL;
      pFlag[i] = NULL;
      sFlag[i] = NULL;
   }
+
+  for (int j=0; j < CONTSIZE; j++) {
+    contclasses[j] = 0;
+  }
+
   if (parse_file(affpath)) {
      fprintf(stderr,"Failure loading aff file %s\n",affpath);
      fflush(stderr);
+     wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
   }
+
 }
 
 
@@ -74,7 +128,8 @@ AffixMgr::~AffixMgr()
             delete(ptr);
             ptr = nptr;
             nptr = NULL;
-       }  
+       }
+       sStart[j] = NULL;
   }
 
   if (trystring) free(trystring);
@@ -83,7 +138,8 @@ AffixMgr::~AffixMgr()
   encoding=NULL;
   if (maptable) {  
      for (int j=0; j < nummap; j++) {
-        free(maptable[j].set);
+        if (maptable[j].set) free(maptable[j].set);
+        if (maptable[j].set_utf16) free(maptable[j].set_utf16);
         maptable[j].set = NULL;
         maptable[j].len = 0;
      }
@@ -91,21 +147,73 @@ AffixMgr::~AffixMgr()
      maptable = NULL;
   }
   nummap = 0;
+  if (breaktable) {
+     for (int j=0; j < numbreak; j++) {
+        if (breaktable[j]) free(breaktable[j]);
+        breaktable[j] = NULL;
+     }
+     free(breaktable);  
+     breaktable = NULL;
+  }
+  numbreak = 0;
   if (reptable) {  
      for (int j=0; j < numrep; j++) {
         free(reptable[j].pattern);
-        free(reptable[j].replacement);
+        free(reptable[j].pattern2);
         reptable[j].pattern = NULL;
-        reptable[j].replacement = NULL;
+        reptable[j].pattern2 = NULL;
      }
      free(reptable);  
      reptable = NULL;
   }
+  if (defcpdtable) {  
+     for (int j=0; j < numdefcpd; j++) {
+        free(defcpdtable[j].def);
+        defcpdtable[j].def = NULL;
+     }
+     free(defcpdtable);  
+     defcpdtable = NULL;
+  }
   numrep = 0;
-  if (compound) free(compound);
-  compound=NULL;
+  if (checkcpdtable) {  
+     for (int j=0; j < numcheckcpd; j++) {
+        free(checkcpdtable[j].pattern);
+        free(checkcpdtable[j].pattern2);
+        checkcpdtable[j].pattern = NULL;
+        checkcpdtable[j].pattern2 = NULL;
+     }
+     free(checkcpdtable);  
+     checkcpdtable = NULL;
+  }
+  numcheckcpd = 0;
+  FREE_FLAG(compoundflag);
+  FREE_FLAG(compoundbegin);
+  FREE_FLAG(compoundmiddle);
+  FREE_FLAG(compoundend);
+  FREE_FLAG(compoundpermitflag);
+  FREE_FLAG(compoundforbidflag);
+  FREE_FLAG(compoundroot);
+  FREE_FLAG(forbiddenword);
+  FREE_FLAG(nosuggest);
+  FREE_FLAG(pseudoroot);
+  FREE_FLAG(lemma_present);
+  FREE_FLAG(circumfix);
+  FREE_FLAG(onlyincompound);
+  
+  cpdwordmax = 0;
   pHMgr = NULL;
   cpdmin = 0;
+  cpdmaxsyllable = 0;
+  if (cpdvowels) free(cpdvowels);
+  if (cpdvowels_utf16) free(cpdvowels_utf16);
+  if (cpdsyllablenum) free(cpdsyllablenum);
+  if (utf_tbl) free(utf_tbl);
+  if (lang) free(lang);
+  if (wordchars) free(wordchars);
+  if (wordchars_utf16) free(wordchars_utf16);
+  if (version) free(version);
+  if (derived) free(derived);
+  checknum=0;
 }
 
 
@@ -118,6 +226,10 @@ int  AffixMgr::parse_file(const char * affpath)
  
   // affix type
   char ft;
+  
+  // checking flag duplication
+  char dupflags[CONTSIZE];
+  char dupflags_ini = 1;
 
   // open the affix file
   FILE * afflst;
@@ -151,16 +263,167 @@ int  AffixMgr::parse_file(const char * affpath)
           }
        }
 
+       /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
+       if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
+		   complexprefixes = 1;
+
        /* parse in the flag used by the controlled compound words */
        if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
-          if (parse_cpdflag(line)) {
+          if (parse_flag(line, &compoundflag, "COMPOUNDFLAG")) {
              return 1;
           }
        }
 
-       /* parse in the flag used by the controlled compound words */
+       /* parse in the flag used by compound words */
+       if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
+          if (complexprefixes) {
+            if (parse_flag(line, &compoundend, "COMPOUNDBEGIN")) {
+              return 1;
+            }
+          } else {
+            if (parse_flag(line, &compoundbegin, "COMPOUNDBEGIN")) {
+              return 1;
+            }
+          }
+       }
+
+       /* parse in the flag used by compound words */
+       if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
+          if (parse_flag(line, &compoundmiddle, "COMPOUNDMIDDLE")) {
+             return 1;
+          }
+       }
+       /* parse in the flag used by compound words */
+       if (strncmp(line,"COMPOUNDEND",11) == 0) {
+          if (complexprefixes) {
+            if (parse_flag(line, &compoundbegin, "COMPOUNDEND")) {
+              return 1;
+            }
+          } else {
+            if (parse_flag(line, &compoundend, "COMPOUNDEND")) {
+              return 1;
+            }
+          }
+       }
+
+       /* parse in the flag used by compound_check() method */
+       if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
+          if (parse_num(line, &cpdwordmax, "COMPOUNDWORDMAX")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag sign compounds in dictionary */
+       if (strncmp(line,"COMPOUNDROOT",12) == 0) {
+          if (parse_flag(line, &compoundroot, "COMPOUNDROOT")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by compound_check() method */
+       if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
+          if (parse_flag(line, &compoundpermitflag, "COMPOUNDPERMITFLAG")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by compound_check() method */
+       if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
+          if (parse_flag(line, &compoundforbidflag, "COMPOUNDFORBIDFLAG")) {
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0)
+		   checkcompounddup = 1;
+
+       if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0)
+		   checkcompoundrep = 1;
+
+       if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0)
+		   checkcompoundtriple = 1;
+
+       if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0)
+		   checkcompoundcase = 1;
+
+       if (strncmp(line,"NOSUGGEST",9) == 0) {
+          if (parse_flag(line, &nosuggest, "NOSUGGEST")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by forbidden words */
+       if (strncmp(line,"FORBIDDENWORD",13) == 0) {
+          if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by forbidden words */
+       if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
+          if (parse_flag(line, &lemma_present, "LEMMA_PRESENT")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by circumfixes */
+       if (strncmp(line,"CIRCUMFIX",9) == 0) {
+          if (parse_flag(line, &circumfix, "CIRCUMFIX")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by fogemorphemes */
+       if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
+          if (parse_flag(line, &onlyincompound, "ONLYINCOMPOUND")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by `pseudoroots' */
+       if (strncmp(line,"PSEUDOROOT",10) == 0) {
+          if (parse_flag(line, &pseudoroot, "PSEUDOROOT")) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by `pseudoroots' */
+       if (strncmp(line,"NEEDAFFIX",9) == 0) {
+          if (parse_flag(line, &pseudoroot, "NEEDAFFIX")) {
+             return 1;
+          }
+       }
+
+       /* parse in the minimal length for words in compounds */
        if (strncmp(line,"COMPOUNDMIN",11) == 0) {
-          if (parse_cpdmin(line)) {
+          if (parse_num(line, &cpdmin, "COMPOUNDMIN")) {
+             return 1;
+          }
+          if (cpdmin < 1) cpdmin = 1;
+       }
+
+       /* parse in the max. words and syllables in compounds */
+       if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
+          if (parse_cpdsyllable(line)) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by compound_check() method */
+       if (strncmp(line,"SYLLABLENUM",11) == 0) {
+          if (parse_syllablenum(line)) {
+             return 1;
+          }
+       }
+
+       /* parse in the flag used by the controlled compound words */
+       if (strncmp(line,"CHECKNUM",8) == 0) {
+           checknum=1;
+       }
+
+       /* parse in the try string */
+       if (strncmp(line,"WORDCHARS",9) == 0) {
+          if (parse_wordchars(line)) {
              return 1;
           }
        }
@@ -172,6 +435,20 @@ int  AffixMgr::parse_file(const char * affpath)
           }
        }
 
+       /* parse in the checkcompoundpattern table */
+       if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
+          if (parse_checkcpdtable(line, afflst)) {
+             return 1;
+          }
+       }
+
+       /* parse in the defcompound table */
+       if (strncmp(line,"COMPOUNDRULE",12) == 0) {
+          if (parse_defcpdtable(line, afflst)) {
+             return 1;
+          }
+       }
+
        /* parse in the related character map table */
        if (strncmp(line,"MAP",3) == 0) {
           if (parse_maptable(line, afflst)) {
@@ -179,19 +456,64 @@ int  AffixMgr::parse_file(const char * affpath)
           }
        }
 
-       // parse this affix: P - prefix, S - suffix
-       ft = ' ';
-       if (strncmp(line,"PFX",3) == 0) ft = 'P';
-       if (strncmp(line,"SFX",3) == 0) ft = 'S';
-       if (ft != ' ') {
-          if (parse_affix(line, ft, afflst)) {
+       /* parse in the word breakpoints table */
+       if (strncmp(line,"BREAK",5) == 0) {
+          if (parse_breaktable(line, afflst)) {
+             return 1;
+          }
+       }
+
+       /* parse in the language for language specific codes */
+       if (strncmp(line,"LANG",4) == 0) {
+          if (parse_lang(line)) {
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"VERSION",7) == 0) {
+          if (parse_version(line)) {
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
+          if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) {
              return 1;
           }
        }
 
-       // handle NOSPLITSUGS
        if (strncmp(line,"NOSPLITSUGS",11) == 0)
-		   nosplitsugs=(0==0);
+		   nosplitsugs=1;
+
+       if (strncmp(line,"SUGSWITHDOTS",12) == 0)
+		   sugswithdots=1;
+
+       /* parse in the flag used by forbidden words */
+       if (strncmp(line,"KEEPCASE",8) == 0) {
+          if (parse_flag(line, &keepcase, "KEEPCASE")) {
+             return 1;
+          }
+       }
+
+       if (strncmp(line,"CHECKSHARPS",11) == 0)
+		   checksharps=1;
+
+       /* parse this affix: P - prefix, S - suffix */
+       ft = ' ';
+       if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
+       if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
+       if (ft != ' ') {
+          if (dupflags_ini) {
+            for (int i = 0; i < CONTSIZE; i++) dupflags[i] = 0;
+            dupflags_ini = 0;
+          }
+          if (parse_affix(line, ft, afflst, dupflags)) {
+             fclose(afflst);
+             process_pfx_tree_to_list();
+             process_sfx_tree_to_list();
+             return 1;
+          }
+       }
 
     }
     fclose(afflst);
@@ -226,6 +548,29 @@ int  AffixMgr::parse_file(const char * affpath)
     process_pfx_order();
     process_sfx_order();
 
+    // expand wordchars string, based on csutil (for external tokenization)
+
+    char * enc = get_encoding();
+    csconv = get_current_cs(enc);
+    free(enc);
+    enc = NULL;
+
+    char expw[MAXLNLEN];
+    if (wordchars) {
+        strcpy(expw, wordchars);
+        free(wordchars);
+    } else *expw = '\0';
+
+    for (int i = 0; i <= 255; i++) {
+        if ( (csconv[i].cupper != csconv[i].clower) &&
+            (! strchr(expw, (char) i))) {
+                *(expw + strlen(expw) + 1) = '\0';
+                *(expw + strlen(expw)) = (char) i;
+        }
+    }
+
+    wordchars = mystrdup(expw);
+
     return 0;
 }
 
@@ -295,8 +640,6 @@ int AffixMgr::build_pfxtree(AffEntry* pfxptr)
   return 0;
 }
 
-
-
 // we want to be able to quickly access suffix information
 // both by suffix flag, and sorted by the reverse of the
 // suffix string itself; so we need to set up two indexes
@@ -315,7 +658,6 @@ int AffixMgr::build_sfxtree(AffEntry* sfxptr)
   ep->setFlgNxt(ptr);
   sFlag[flg] = (AffEntry *) ep;
 
-
   // next index by affix string
 
   // handle the special case of null affix string
@@ -340,7 +682,6 @@ int AffixMgr::build_sfxtree(AffEntry* sfxptr)
      return 0;
   }
 
-
   // otherwise use binary tree insertion so that a sorted
   // list can easily be generated later
   pptr = NULL;
@@ -363,7 +704,6 @@ int AffixMgr::build_sfxtree(AffEntry* sfxptr)
   return 0;
 }
 
-
 // convert from binary tree to sorted list
 int AffixMgr::process_pfx_tree_to_list()
 {
@@ -405,7 +745,6 @@ AffEntry* AffixMgr::process_sfx_in_order(AffEntry* ptr, AffEntry* nptr)
 }
 
 
-
 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
 // using the idea of leading subsets this time
 int AffixMgr::process_pfx_order()
@@ -455,9 +794,7 @@ int AffixMgr::process_pfx_order()
     return 0;
 }
 
-
-
-// reinitialize the SfxEntry links NextEQ and NextNE to speed searching
+// initialize the SfxEntry links NextEQ and NextNE to speed searching
 // using the idea of leading subsets this time
 int AffixMgr::process_sfx_order()
 {
@@ -513,14 +850,16 @@ int AffixMgr::process_sfx_order()
 // file affentry.cxx which describes what is going on here
 // in much more detail
 
-void AffixMgr::encodeit(struct affentry * ptr, char * cs)
+int AffixMgr::encodeit(struct affentry * ptr, char * cs)
 {
   unsigned char c;
   int i, j, k;
   unsigned char mbr[MAXLNLEN];
+  w_char wmbr[MAXLNLEN];
+  w_char * wpos = wmbr;
 
   // now clear the conditions array */
-  for (i=0;i<SETSIZE;i++) ptr->conds[i] = (unsigned char) 0;
+  for (i=0;i<SETSIZE;i++) ptr->conds.base[i] = (unsigned char) 0;
 
   // now parse the string to create the conds array */
   int nc = strlen(cs);
@@ -533,7 +872,7 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
   // if no condition just return
   if (strcmp(cs,".")==0) {
     ptr->numconds = 0;
-    return;
+    return 0;
   }
 
   i = 0;
@@ -570,21 +909,21 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
        ec = 1;
     }
 
-    
-    if (ec) {
+  if (ec) {    
+    if (!utf8) {
       if (grp == 1) {
         if (neg == 0) {
           // set the proper bits in the condition array vals for those chars
 	  for (j=0;j<nm;j++) {
 	     k = (unsigned int) mbr[j];
-             ptr->conds[k] = ptr->conds[k] | (1 << n);
+             ptr->conds.base[k] = ptr->conds.base[k] | (1 << n);
           }
 	} else {
 	  // complement so set all of them and then unset indicated ones
-	   for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+	   for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | (1 << n);
 	   for (j=0;j<nm;j++) {
 	     k = (unsigned int) mbr[j];
-             ptr->conds[k] = ptr->conds[k] & ~(1 << n);
+             ptr->conds.base[k] = ptr->conds.base[k] & ~(1 << n);
 	   }
         }
         neg = 0;
@@ -595,33 +934,115 @@ void AffixMgr::encodeit(struct affentry * ptr, char * cs)
          // but first handle special case of . inside condition
          if (c == '.') {
 	    // wild card character so set them all
-            for (j=0;j<SETSIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+            for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | (1 << n);
          } else {  
-	    ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
+	    ptr->conds.base[(unsigned int) c] = ptr->conds.base[(unsigned int)c] | (1 << n);
          }
       }
       n++;
       ec = 0;
-    }
-
+    } else { // UTF-8 character set
+      if (grp == 1) {
+        ptr->conds.utf8.neg[n] = neg;
+        if (neg == 0) {
+          // set the proper bits in the condition array vals for those chars
+	  for (j=0;j<nm;j++) {
+	     k = (unsigned int) mbr[j];
+             if (k >> 7) {
+                u8_u16(wpos, 1, (char *) mbr + j);
+                wpos++;
+                if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
+             } else {
+                ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] | (1 << n);
+             }
+          }
+	} else { // neg == 1
+	  // complement so set all of them and then unset indicated ones
+	   for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | (1 << n);
+	   for (j=0;j<nm;j++) {
+	     k = (unsigned int) mbr[j];
+             if (k >> 7) {
+                u8_u16(wpos, 1, (char *) mbr + j);
+                wpos++;
+                if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
+             } else {
+                ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] & ~(1 << n);
+             }
+	   }
+        }
+        neg = 0;
+        grp = 0;   
+        nm = 0;
+        ptr->conds.utf8.wlen[n] = wpos - wmbr;
+        if ((wpos - wmbr) != 0) {
+            ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char) * (wpos - wmbr));
+            if (!ptr->conds.utf8.wchars[n]) return 1;
+            memcpy(ptr->conds.utf8.wchars[n], wmbr, sizeof(w_char) * (wpos - wmbr));
+            flag_qsort((unsigned short *) ptr->conds.utf8.wchars[n], 0, ptr->conds.utf8.wlen[n]);
+            wpos = wmbr;
+        }
+      } else { // grp == 0
+         // is UTF-8 character?
+         if (c >> 7) {
+            ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char));
+            if (!ptr->conds.utf8.wchars[n]) return 1;
+            ptr->conds.utf8.wlen[n] = 1;
+            u8_u16(ptr->conds.utf8.wchars[n], 1, cs + i);
+            if ((c & 0xe0) == 0xe0) i+=2; else i++; // 3-byte UFT-8 character
+         } else {
+            ptr->conds.utf8.wchars[n] = NULL;
+            // not a group so just set the proper bit for this char
+            // but first handle special case of . inside condition
+            if (c == '.') {
+                ptr->conds.utf8.all[n] = 1;
+	        // wild card character so set them all
+                for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | (1 << n);
+            } else {
+                ptr->conds.utf8.all[n] = 0;
+	        ptr->conds.utf8.ascii[(unsigned int) c] = ptr->conds.utf8.ascii[(unsigned int)c] | (1 << n);
+            }
+         }
+         neg = 0;
+      }
+      n++;
+      ec = 0;
+      neg = 0;
+    }  
+  }
 
     i++;
   }
   ptr->numconds = n;
-  return;
+  return 0;
 }
 
 
 // check word for prefixes
-struct hentry * AffixMgr::prefix_check (const char * word, int len)
+struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
+    const FLAG needflag)
 {
     struct hentry * rv= NULL;
- 
+
+    pfx = NULL;
+    pfxappnd = NULL;
+    sfxappnd = NULL;
+    
     // first handle the special case of 0 length prefixes
     PfxEntry * pe = (PfxEntry *) pStart[0];
     while (pe) {
-       rv = pe->check(word,len);
-       if (rv) return rv;
+        if (
+            // fogemorpheme
+              ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
+                  (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
+            // permit prefixes in compounds
+              ((in_compound != IN_CPD_END) || (pe->getCont() &&
+                  (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen())))) &&
+            // check prefix
+                  (rv = pe->check(word, len, in_compound, needflag))
+              ) {
+                    pfx=(AffEntry *)pe; // BUG: pfx not stateless
+	            return rv;
+	     }
        pe = pe->getNext();
     }
   
@@ -631,8 +1052,19 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
 
     while (pptr) {
         if (isSubset(pptr->getKey(),word)) {
-	     rv = pptr->check(word,len);
-             if (rv) return rv;
+             if (
+            // fogemorpheme
+              ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
+                  (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
+            // permit prefixes in compounds
+              ((in_compound != IN_CPD_END) || (pptr->getCont() &&
+                  (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen())))) &&
+            // check prefix
+                  (rv = pptr->check(word, len, in_compound, needflag))
+              ) {
+                    pfx=(AffEntry *)pptr; // BUG: pfx not stateless
+	            return rv;
+	     }
              pptr = pptr->getNextEQ();
         } else {
 	     pptr = pptr->getNextNE();
@@ -642,113 +1074,1574 @@ struct hentry * AffixMgr::prefix_check (const char * word, int len)
     return NULL;
 }
 
-// check if compound word is correctly spelled
-struct hentry * AffixMgr::compound_check (const char * word, int len, char compound_flag)
+// check word for prefixes
+struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
+    char in_compound, const FLAG needflag)
 {
-    int i;
     struct hentry * rv= NULL;
+
+    pfx = NULL;
+    sfxappnd = NULL;
+    
+    // first handle the special case of 0 length prefixes
+    PfxEntry * pe = (PfxEntry *) pStart[0];
+    
+    while (pe) {
+        rv = pe->check_twosfx(word, len, in_compound, needflag);
+        if (rv) return rv;
+        pe = pe->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)word);
+    PfxEntry * pptr = (PfxEntry *)pStart[sp];
+
+    while (pptr) {
+        if (isSubset(pptr->getKey(),word)) {
+            rv = pptr->check_twosfx(word, len, in_compound, needflag);
+            if (rv) {
+                pfx = (AffEntry *)pptr;
+	        return rv;
+	    }
+            pptr = pptr->getNextEQ();
+        } else {
+	     pptr = pptr->getNextNE();
+        }
+    }
+    
+    return NULL;
+}
+
+
+// check word for prefixes
+char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
+    const FLAG needflag)
+{
     char * st;
-    char ch;
+
+    char result[MAXLNLEN];
+    result[0] = '\0';
+
+    pfx = NULL;
+    sfxappnd = NULL;
     
-    // handle case of string too short to be a piece of a compound word 
-    if (len < cpdmin) return NULL;
+    // first handle the special case of 0 length prefixes
+    PfxEntry * pe = (PfxEntry *) pStart[0];
+    while (pe) {
+       st = pe->check_morph(word,len,in_compound, needflag);
+       if (st) {
+            strcat(result, st);
+            free(st);
+       }
+       // if (rv) return rv;
+       pe = pe->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)word);
+    PfxEntry * pptr = (PfxEntry *)pStart[sp];
 
-    st = mystrdup(word);
+    while (pptr) {
+        if (isSubset(pptr->getKey(),word)) {
+            st = pptr->check_morph(word,len,in_compound, needflag);
+            if (st) {
+              // fogemorpheme
+              if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() && 
+                        (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
+                    strcat(result, st);
+                    pfx = (AffEntry *)pptr;
+                }
+                free(st);
+            }
+            pptr = pptr->getNextEQ();
+        } else {
+	    pptr = pptr->getNextNE();
+        }
+    }
     
-    for (i=cpdmin; i < (len - (cpdmin-1)); i++) {
+    if (*result) return mystrdup(result);
+    return NULL;
+}
+
+
+// check word for prefixes
+char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
+    char in_compound, const FLAG needflag)
+{
+    char * st;
+
+    char result[MAXLNLEN];
+    result[0] = '\0';
+
+    pfx = NULL;
+    sfxappnd = NULL;
+    
+    // first handle the special case of 0 length prefixes
+    PfxEntry * pe = (PfxEntry *) pStart[0];
+    while (pe) {
+        st = pe->check_twosfx_morph(word,len,in_compound, needflag);
+        if (st) {
+            strcat(result, st);
+            free(st);
+        }
+        pe = pe->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)word);
+    PfxEntry * pptr = (PfxEntry *)pStart[sp];
+
+    while (pptr) {
+        if (isSubset(pptr->getKey(),word)) {
+            st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
+            if (st) {
+                strcat(result, st);
+                free(st);
+                pfx = (AffEntry *)pptr;
+            }
+            pptr = pptr->getNextEQ();
+        } else {
+	    pptr = pptr->getNextNE();
+        }
+    }
+    
+    if (*result) return mystrdup(result);
+    return NULL;
+}
 
+
+// Is word a non compound with a REP substitution (see checkcompoundrep)?
+int AffixMgr::cpdrep_check(const char * word, int wl)
+{
+  char candidate[MAXLNLEN];
+  const char * r;
+  int lenr, lenp;
+
+  if ((wl < 2) || !numrep) return 0;
+
+  for (int i=0; i < numrep; i++ ) {
+      r = word;
+      lenr = strlen(reptable[i].pattern2);
+      lenp = strlen(reptable[i].pattern);
+      // search every occurence of the pattern in the word
+      while ((r=strstr(r, reptable[i].pattern)) != NULL) {
+	  strcpy(candidate, word);
+	  if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
+	  strcpy(candidate+(r-word),reptable[i].pattern2);
+	  strcpy(candidate+(r-word)+lenr, r+lenp);
+          if (candidate_check(candidate,strlen(candidate))) return 1;
+          if (candidate_check(candidate,strlen(candidate))) return 1;
+          r++; // search for the next letter
+      }
+   }
+   return 0;
+}
+
+// forbid compoundings when there are special patterns at word bound
+int AffixMgr::cpdpat_check(const char * word, int pos)
+{
+  int len;
+  for (int i = 0; i < numcheckcpd; i++) {
+      if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
+        (len = strlen(checkcpdtable[i].pattern)) && (pos > len) &&
+        (strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)) return 1;
+  }
+  return 0;
+}
+
+// forbid compounding with neighbouring upper and lower case characters at word bounds
+int AffixMgr::cpdcase_check(const char * word, int pos)
+{
+  if (utf8) {
+      w_char u, w;
+      const char * p;
+      u8_u16(&u, 1, word + pos);
+      for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
+      u8_u16(&w, 1, p);
+      unsigned short a = (u.h << 8) + u.l;
+      unsigned short b = (w.h << 8) + w.l;
+      if (utf_tbl[a].cletter && utf_tbl[a].cletter &&
+          ((utf_tbl[a].cupper == a) || (utf_tbl[b].cupper == b))) return 1;
+  } else {
+      unsigned char a = *(word + pos - 1);
+      unsigned char b = *(word + pos);
+      if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
+  }
+  return 0;
+}
+
+// check compound patterns
+int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
+{
+  short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
+  short btwp[MAXWORDLEN]; // word positions for metacharacters
+  int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
+  short bt = 0;  
+  int i;
+  int ok;
+  int w = 0;
+  if (!*words) {
+    w = 1;
+    *words = def;
+  }
+  (*words)[wnum] = rv;
+
+  for (i = 0; i < numdefcpd; i++) {
+    int pp = 0; // pattern position
+    int wp = 0; // "words" position
+    int ok2;
+    ok = 1;
+    ok2 = 1;
+    do {
+      while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
+        if (((pp+1) < defcpdtable[i].len) &&
+          ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
+            int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
+            ok2 = 1;
+            pp+=2;
+            btpp[bt] = pp;
+            btwp[bt] = wp;
+            while (wp <= wend) {
+                if (!(*words)[wp]->alen || 
+                  !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
+                    ok2 = 0;
+                    break;
+                }
+                wp++;
+            }
+            if (wp <= wnum) ok2 = 0;
+            btnum[bt] = wp - btwp[bt];
+            if (btnum[bt] > 0) bt++;
+            if (ok2) break;
+        } else {
+            ok2 = 1;
+            if (!(*words)[wp] || !(*words)[wp]->alen || 
+              !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
+                ok = 0;
+                break;
+            }
+            pp++;
+            wp++;
+            if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
+        }
+      }
+    if (ok && ok2) { 
+        int r = pp;
+        while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
+            ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
+        if (defcpdtable[i].len <= r) return 1;
+    }    
+    // backtrack
+    if (bt) do {
+        ok = 1;
+        btnum[bt - 1]--;
+        pp = btpp[bt - 1];
+        wp = btwp[bt - 1] + btnum[bt - 1];
+    } while ((btnum[bt - 1] < 0) && --bt);
+  } while (bt);
+
+  if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1; 
+  // check zero ending
+  while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
+    ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
+  if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
+  }
+  (*words)[wnum] = NULL;
+  if (w) *words = NULL;
+  return 0;
+}
+
+inline int AffixMgr::candidate_check(const char * word, int len)
+{
+  struct hentry * rv=NULL;
+  
+  rv = lookup(word);
+  if (rv) return 1;
+
+//  rv = prefix_check(word,len,1);
+//  if (rv) return 1;
+  
+  rv = affix_check(word,len);
+  if (rv) return 1;
+  return 0;
+}
+
+// calculate number of syllable for compound-checking
+int AffixMgr::get_syllable(const char * word, int wlen)
+{
+    if (cpdmaxsyllable==0) return 0;
+    
+    int num=0;
+
+    if (!utf8) {
+        for (int i=0; i<wlen; i++) {
+	    if (strchr(cpdvowels, word[i])) num++;
+        }
+    } else if (cpdvowels_utf16) {
+        w_char w[MAXWORDUTF8LEN];
+        int i = u8_u16(w, MAXWORDUTF8LEN, word);
+        for (; i; i--) {
+            if (flag_bsearch((unsigned short *) cpdvowels_utf16,
+                ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
+        }
+    }
+    return num;
+}
+
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+struct hentry * AffixMgr::compound_check(const char * word, int len, 
+    short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
+    char hu_mov_rule = 0, int * cmpdstemnum = NULL, int * cmpdstem = NULL, char is_sug = 0)
+{
+    int i, oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+    int oldcmpdstemnum = 0;
+    struct hentry * rv = NULL;
+    struct hentry * rv_first;
+    struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
+    char st [MAXWORDUTF8LEN + 4];
+    char ch;
+    int cmin;
+    int cmax;
+    
+    int checked_prefix;
+
+#ifdef HUNSTEM
+    if (cmpdstemnum) {
+	if (wordnum == 0) {
+	    *cmpdstemnum = 1;
+	} else {
+	    (*cmpdstemnum)++;
+	}
+    }
+#endif
+    if (utf8) {
+        for (cmin = 0, i = 0; (i < cpdmin) && word[cmin]; i++) {
+          cmin++;
+          for (; (word[cmin] & 0xc0) == 0x80; cmin++);
+        }
+        for (cmax = len, i = 0; (i < (cpdmin - 1)) && cmax; i++) {
+          cmax--;
+          for (; (word[cmax] & 0xc0) == 0x80; cmax--);
+        }
+    } else {
+        cmin = cpdmin;
+        cmax = len - cpdmin + 1;
+    }
+
+    strcpy(st, word);
+
+    for (i = cmin; i < cmax; i++) {
+
+        oldnumsyllable = numsyllable;
+        oldwordnum = wordnum;
+        checked_prefix = 0;
+
+        // go to end of the UTF-8 character
+        if (utf8) {
+            for (; (st[i] & 0xc0) == 0x80; i++);
+            if (i >= cmax) return NULL;
+        }
+
+	
         ch = st[i];
-	st[i] = '\0';
+        st[i] = '\0';
+
+        sfx = NULL;
+        pfx = NULL;
+        
+	// FIRST WORD
+	
+        rv = lookup(st); // perhaps without prefix
+
+        // search homonym with compound flag
+        while ((rv) && !hu_mov_rule &&
+            ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+		!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	          (compoundbegin && !wordnum &&
+                        TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+                  (compoundmiddle && wordnum && !words &&
+                    TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+                  (numdefcpd &&
+                    ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
+                    (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
+                  ))) {
+            rv = rv->next_homonym;
+        }
 
-	rv = lookup(st);
-        if (!rv) rv = affix_check(st,i);
+        if (!rv) {
+            if (compoundflag && 
+             !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
+                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
+                    ((SfxEntry*)sfx)->getCont() &&
+                        ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                            ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend, 
+                            ((SfxEntry*)sfx)->getContLen())))) {
+                        rv = NULL;
+                }
+            }
+            if (rv ||
+              (((wordnum == 0) && compoundbegin &&
+                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
+              ((wordnum > 0) && compoundmiddle &&
+                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
+              ) checked_prefix = 1;
+        // else check forbiddenwords and pseudoroot
+	} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+            TESTAFF(rv->astr, pseudoroot, rv->alen) || 
+            (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
+             )) {
+                st[i] = ch;
+                continue;
+	}
 
-	if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
-	    rv = lookup((word+i));
-	    if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
-		free(st);
-		return rv;
+            // check non_compound flag in suffix and prefix
+            if ((rv) && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }
+
+            // check compoundend flag in suffix and prefix
+            if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }
+	    
+            // check compoundmiddle flag in suffix and prefix
+            if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }	    
+
+	// check forbiddenwords
+	if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+            (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
+                return NULL;
+            }
+
+	// increment word number, if the second root has a compoundroot flag
+	if ((rv) && compoundroot && 
+	    (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		wordnum++;
+	}
+
+	// first word is acceptable in compound words?
+	if (((rv) && 
+	  ( checked_prefix || (words && words[wnum]) ||
+	    (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	    ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+	    ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
+//            (numdefcpd && )
+
+// LANG_hu section: spec. Hungarian rule
+	    || ((langnum == LANG_hu) && hu_mov_rule && (
+		    TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
+		    TESTAFF(rv->astr, 'G', rv->alen) ||
+		    TESTAFF(rv->astr, 'H', rv->alen)
+		)
+	      )
+// END of LANG_hu section
+	  )
+	  && ! (( checkcompoundtriple && // test triple letters
+                   (word[i-1]==word[i]) && (
+                      ((i>1) && (word[i-1]==word[i-2])) || 
+                      ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
+		   )
+               ) ||
+               ( 
+                 // test CHECKCOMPOUNDPATTERN
+	         numcheckcpd && cpdpat_check(word, i)
+	       ) ||
+               ( 
+	         checkcompoundcase && cpdcase_check(word, i)
+               ))
+         )
+// LANG_hu section: spec. Hungarian rule
+         || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
+              (sfx && ((SfxEntry*)sfx)->getCont() && ( // XXX hardwired Hungarian dic. codes
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
+                    )                
+               )
+	     )
+// END of LANG_hu section
+         ) {
+
+// LANG_hu section: spec. Hungarian rule
+	    if (langnum == LANG_hu) {
+                // calculate syllable number of the word            
+                numsyllable += get_syllable(st, i);
+
+	        // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
+	        if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+            }
+// END of LANG_hu section
+
+#ifdef HUNSTEM
+	    if (cmpdstem) cmpdstem[*cmpdstemnum - 1] = i;
+#endif
+
+	    // NEXT WORD(S)
+	    rv_first = rv;
+	    rv = lookup((word+i)); // perhaps without prefix
+
+        // search homonym with compound flag
+        while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+			!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+			  (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
+                           (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
+            rv = rv->next_homonym;
+        }
+
+            if (rv && words && words[wnum + 1]) return rv;
+
+	    oldnumsyllable2 = numsyllable;
+	    oldwordnum2 = wordnum;
+
+// LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
+	    if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
+		numsyllable--;
 	    }
-	    rv = affix_check((word+i),strlen(word+i));
-	    if ((rv) && (TESTAFF(rv->astr, compound_flag, rv->alen))) {
-		free(st);
-		return rv;
+// END of LANG_hu section
+
+	    // increment word number, if the second root has a compoundroot flag
+	    if ((rv) && (compoundroot) && 
+		(TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		    wordnum++;
+	    }
+
+	    // check forbiddenwords
+	    if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+               (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
+
+	    // second word is acceptable, as a root?
+	    // hungarian conventions: compounding is acceptable,
+	    // when compound forms consist of 2 words, or if more,
+	    // then the syllable number of root words must be 6, or lesser.
+
+	    if ((rv) && (
+	              (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	              (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
+	            )
+		&& (
+		      ((cpdwordmax==0) || (wordnum+1<cpdwordmax)) || 
+		      ((cpdmaxsyllable==0) || 
+		          (numsyllable + get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
+		    )
+		&& (
+		     (!checkcompounddup || (rv != rv_first))
+		   )
+		)
+		 {
+		      // forbid compound word, if it is a non compound word with typical fault
+		      if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
+		      return rv;
+	    }
+
+	    numsyllable = oldnumsyllable2 ;
+	    wordnum = oldwordnum2;
+
+	    // perhaps second word has prefix or/and suffix
+            sfx = NULL;
+	    sfxflag = FLAG_NULL;
+	    rv = (compoundflag) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
+            if (!rv && compoundend) {
+                sfx = NULL;
+                pfx = NULL;
+                rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
+            }
+            
+            if (!rv && numdefcpd && words) {
+                rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
+                if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv;
+            }
+
+            // check non_compound flag in suffix and prefix
+            if ((rv) && 
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }
+
+	    // check forbiddenwords
+	    if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+               (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
+
+	    // pfxappnd = prefix of word+i, or NULL
+	    // calculate syllable number of prefix.
+	    // hungarian convention: when syllable number of prefix is more,
+	    // than 1, the prefix+word counts as two words.
+
+            if (langnum == LANG_hu) {
+	        // calculate syllable number of the word
+	        numsyllable += get_syllable(word + i, strlen(word + i));
+                
+                // - affix syllable num.
+                // XXX only second suffix (inflections, not derivations)
+                if (sfxappnd) {
+                    char * tmp = myrevstrdup(sfxappnd);
+	            numsyllable -= get_syllable(tmp, strlen(tmp));
+                    free(tmp);
+                }
+                
+                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
+	        if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+
+	        // increment syllable num, if last word has a SYLLABLENUM flag
+	        // and the suffix is beginning `s'
+            
+	        if (cpdsyllablenum) {
+	            switch (sfxflag) {
+		        case 'c': { numsyllable+=2; break; }
+		        case 'J': { numsyllable += 1; break; }
+                        case 'I': { if (TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
+                    }
+                }
+            }
+            
+	    // increment word number, if the second word has a compoundroot flag
+	    if ((rv) && (compoundroot) && 
+		(TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		    wordnum++;
+	    }
+
+	    // second word is acceptable, as a word with prefix or/and suffix?
+	    // hungarian conventions: compounding is acceptable,
+	    // when compound forms consist 2 word, otherwise
+	    // the syllable number of root words is 6, or lesser.
+	    if ((rv) && 
+                    (
+		      ((cpdwordmax ==0 ) || (wordnum + 1 < cpdwordmax)) || 
+		      ((cpdmaxsyllable == 0) || 
+		          (numsyllable <= cpdmaxsyllable))
+		    )
+		&& (
+		   (!checkcompounddup || (rv != rv_first))
+		   )) {
+		    // forbid compound word, if it is a non compound word with typical fault
+		    if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
+		    return rv;
+	    }
+
+	    numsyllable = oldnumsyllable2;
+	    wordnum = oldwordnum2;
+#ifdef HUNSTEM
+	    if (cmpdstemnum) oldcmpdstemnum = *cmpdstemnum;
+#endif
+	    // perhaps second word is a compound word (recursive call)
+	    if (wordnum < maxwordnum) {
+		rv = compound_check((word+i),strlen(word+i), wordnum+1,
+		     numsyllable, maxwordnum, wnum + 1, words,
+                     0, cmpdstemnum, cmpdstem, is_sug);
+	    } else {
+		rv=NULL;
 	    }
-	    rv = compound_check((word+i),strlen(word+i),compound_flag); 
 	    if (rv) {
-		free(st);
+		// forbid compound word, if it is a non compound word with typical fault
+		if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
 		return rv;
+	    } else {
+#ifdef HUNSTEM
+	    if (cmpdstemnum) *cmpdstemnum = oldcmpdstemnum;
+#endif
 	    }
-	    
 	}
         st[i] = ch;
+	wordnum = oldwordnum;
+	numsyllable = oldnumsyllable;
     }
-    free(st);
+    
     return NULL;
 }    
 
 
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+int AffixMgr::compound_check_morph(const char * word, int len, 
+    short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
+    char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
+{
+    int i, oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+    int ok = 0;
+
+    struct hentry * rv = NULL;
+    struct hentry * rv_first;
+    struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
+    char st [MAXWORDUTF8LEN + 4];
+    char ch;
+    
+    int checked_prefix;
+    char presult[MAXLNLEN];
+
+    int cmin;
+    int cmax;
+    
+    if (utf8) {
+        for (cmin = 0, i = 0; (i < cpdmin) && word[cmin]; i++) {
+          cmin++;
+          for (; (word[cmin] & 0xc0) == 0x80; cmin++);
+        }
+        for (cmax = len, i = 0; (i < (cpdmin - 1)) && cmax; i++) {
+          cmax--;
+          for (; (word[cmax] & 0xc0) == 0x80; cmax--);
+        }
+    } else {
+        cmin = cpdmin;
+        cmax = len - cpdmin + 1;
+    }
+
+    strcpy(st, word);
+
+    for (i = cmin; i < cmax; i++) {
+	oldnumsyllable = numsyllable;
+	oldwordnum = wordnum;
+        checked_prefix = 0;
+
+        // go to end of the UTF-8 character
+        if (utf8) {
+            for (; (st[i] & 0xc0) == 0x80; i++);
+            if (i >= cmax) return 0;
+        }
+	
+        ch = st[i];
+	st[i] = '\0';
+        sfx = NULL;
+
+	// FIRST WORD
+        *presult = '\0';
+        if (partresult) strcat(presult, partresult);
+	
+	rv = lookup(st); // perhaps without prefix
+
+        // search homonym with compound flag
+        while ((rv) && !hu_mov_rule && 
+            ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+		!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	        (compoundbegin && !wordnum &&
+                        TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+                (compoundmiddle && wordnum && !words &&
+                    TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+                  (numdefcpd &&
+                    ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
+                    (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
+                  ))) {
+            rv = rv->next_homonym;
+        }
+
+        if (rv)	 {
+            if (rv->description) {
+                if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
+					strcat(presult, st);
+                strcat(presult, rv->description);
+            }
+        }
+        
+        if (!rv) {
+            if (compoundflag && 
+             !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
+                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
+                    ((SfxEntry*)sfx)->getCont() &&
+                        ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                            ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend, 
+                            ((SfxEntry*)sfx)->getContLen())))) {
+                        rv = NULL;
+                }
+            }
+            
+            if (rv ||
+              (((wordnum == 0) && compoundbegin &&
+                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
+              ((wordnum > 0) && compoundmiddle &&
+                ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+                (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
+              ) {
+                //char * p = prefix_check_morph(st, i, 0, compound);
+                char * p = NULL;
+                if (compoundflag) p = affix_check_morph(st, i, compoundflag);
+                if (!p || (*p == '\0')) {
+                   if ((wordnum == 0) && compoundbegin) {
+                     p = affix_check_morph(st, i, compoundbegin);
+                   } else if ((wordnum > 0) && compoundmiddle) {
+                     p = affix_check_morph(st, i, compoundmiddle);                   
+                   }
+                }
+                if (*p != '\0') {
+                    line_uniq(p);
+                    if (strchr(p, '\n')) {
+                        strcat(presult, "(");
+                        strcat(presult, line_join(p, '|'));
+                        strcat(presult, ")");
+                      } else {
+                        strcat(presult, p);
+                      }
+                }
+                if (presult[strlen(presult) - 1] == '\n') {
+                    presult[strlen(presult) - 1] = '\0';
+                }
+                checked_prefix = 1;
+                //strcat(presult, "+");
+            }
+        // else check forbiddenwords
+	} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+            TESTAFF(rv->astr, pseudoroot, rv->alen))) {
+                st[i] = ch;
+                continue;
+	}
+
+            // check non_compound flag in suffix and prefix
+            if ((rv) && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    continue;
+            }
+
+            // check compoundend flag in suffix and prefix
+            if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    continue;
+            }
+
+            // check compoundmiddle flag in suffix and prefix
+            if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }	    
+
+	// check forbiddenwords
+	if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) continue;
+
+	// increment word number, if the second root has a compoundroot flag
+	if ((rv) && (compoundroot) && 
+	    (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		wordnum++;
+	}
+
+	// first word is acceptable in compound words?
+	if (((rv) && 
+	  ( checked_prefix || (words && words[wnum]) ||
+	    (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	    ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+	    ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen)) 
+// LANG_hu section: spec. Hungarian rule
+	    || ((langnum == LANG_hu) &&	// hu_mov_rule
+	        hu_mov_rule && (
+		    TESTAFF(rv->astr, 'F', rv->alen) ||
+		    TESTAFF(rv->astr, 'G', rv->alen) ||
+		    TESTAFF(rv->astr, 'H', rv->alen)
+		)
+	      )
+// END of LANG_hu section
+	  )
+	  && ! (( checkcompoundtriple && // test triple letters
+                   (word[i-1]==word[i]) && (
+                      ((i>1) && (word[i-1]==word[i-2])) || 
+                      ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
+		   )
+               ) ||
+	       (
+	           // test CHECKCOMPOUNDPATTERN
+                   numcheckcpd && cpdpat_check(word, i)
+	       ) ||
+               ( 
+	         checkcompoundcase && cpdcase_check(word, i)
+               ))
+         )
+// LANG_hu section: spec. Hungarian rule
+         || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
+              (sfx && ((SfxEntry*)sfx)->getCont() && (
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
+                        TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
+                    )                
+               )
+	     )
+// END of LANG_hu section
+         ) {
+
+// LANG_hu section: spec. Hungarian rule
+            if (langnum == LANG_hu) {
+	        // calculate syllable number of the word
+	        numsyllable += get_syllable(st, i);
+
+	        // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
+	        if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+            }
+// END of LANG_hu section
+
+	    // NEXT WORD(S)
+	    rv_first = rv;
+	    rv = lookup((word+i)); // perhaps without prefix
+
+        // search homonym with compound flag
+        while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+			!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+			  (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
+                           (numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
+            rv = rv->next_homonym;
+        }
+
+            if (rv && words && words[wnum + 1]) {
+                  strcat(*result, presult);
+                  if (complexprefixes && rv->description) strcat(*result, rv->description);
+                  if (rv->description && ((!rv->astr) || 
+		     !TESTAFF(rv->astr, lemma_present, rv->alen)))
+			strcat(*result, rv->word);
+                  if (!complexprefixes && rv->description) strcat(*result, rv->description);
+                  strcat(*result, "\n");
+                  ok = 1;
+                  return 0;
+            }
+
+	    oldnumsyllable2 = numsyllable;
+	    oldwordnum2 = wordnum;
+
+// LANG_hu section: spec. Hungarian rule
+	    if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
+		numsyllable--;
+	    }
+// END of LANG_hu section
+	    // increment word number, if the second root has a compoundroot flag
+	    if ((rv) && (compoundroot) && 
+		(TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		    wordnum++;
+	    }
+
+	    // check forbiddenwords
+	    if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) {
+                st[i] = ch;
+                continue;
+            }
+                    
+	    // second word is acceptable, as a root?
+	    // hungarian conventions: compounding is acceptable,
+	    // when compound forms consist of 2 words, or if more,
+	    // then the syllable number of root words must be 6, or lesser.
+	    if ((rv) && (
+	              (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+	              (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
+	            )
+		&& (
+		      ((cpdwordmax==0) || (wordnum+1<cpdwordmax)) || 
+		      ((cpdmaxsyllable==0) || 
+		          (numsyllable+get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
+		    )
+		&& (
+		     (!checkcompounddup || (rv != rv_first))
+		   )
+		)
+		 {
+		      // bad compound word
+                      strcat(*result, presult);
+					  
+                      if (rv->description) {
+                        if (complexprefixes) strcat(*result, rv->description);
+                        if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
+					       strcat(*result, rv->word);
+                        if (!complexprefixes) strcat(*result, rv->description);
+                      }
+                      strcat(*result, "\n");
+		              ok = 1;
+	    }
+
+	    numsyllable = oldnumsyllable2 ;
+	    wordnum = oldwordnum2;
+
+	    // perhaps second word has prefix or/and suffix
+            sfx = NULL;
+	    sfxflag = FLAG_NULL;
+
+            if (compoundflag) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
+
+            if (!rv && compoundend) {
+                sfx = NULL;
+                pfx = NULL;
+                rv = affix_check((word+i),strlen(word+i), compoundend);
+            }
+
+            if (!rv && numdefcpd && words) {
+                rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
+                if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
+                      char * m = NULL;
+                      if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
+                      if ((!m || *m == '\0') && compoundend)
+                            m = affix_check_morph((word+i),strlen(word+i), compoundend);
+                      strcat(*result, presult);
+                      line_uniq(m);
+                      if (strchr(m, '\n')) {
+                            strcat(*result, "(");
+                            strcat(*result, line_join(m, '|'));
+                            strcat(*result, ")");
+                      } else {
+                            strcat(*result, m);
+                      }
+                      free(m);
+                      strcat(*result, "\n");
+                      ok = 1;
+                }
+            }
+
+            // check non_compound flag in suffix and prefix
+            if ((rv) && 
+                ((pfx && ((PfxEntry*)pfx)->getCont() &&
+                    TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag, 
+                        ((PfxEntry*)pfx)->getContLen())) ||
+                (sfx && ((SfxEntry*)sfx)->getCont() &&
+                    TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag, 
+                        ((SfxEntry*)sfx)->getContLen())))) {
+                    rv = NULL;
+            }
+
+	    // check forbiddenwords
+	    if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
+	    	    && (! TESTAFF(rv->astr, pseudoroot, rv->alen))) {
+                        st[i] = ch;
+                        continue;
+                    }
+
+            if (langnum == LANG_hu) {
+                // calculate syllable number of the word
+                numsyllable += get_syllable(word + i, strlen(word + i));
+
+                // - affix syllable num.
+                // XXX only second suffix (inflections, not derivations)
+                if (sfxappnd) {
+                    char * tmp = myrevstrdup(sfxappnd);
+	            numsyllable -= get_syllable(tmp, strlen(tmp));
+                    free(tmp);
+                }
+
+                // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
+                if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
+
+                // increment syllable num, if last word has a SYLLABLENUM flag
+                // and the suffix is beginning `s'
+
+	        if (cpdsyllablenum) {
+	            switch (sfxflag) {
+		        case 'c': { numsyllable+=2; break; }
+		        case 'J': { numsyllable += 1; break; }
+                        case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
+	            }
+	        }
+            }
+
+	    // increment word number, if the second word has a compoundroot flag
+	    if ((rv) && (compoundroot) && 
+		(TESTAFF(rv->astr, compoundroot, rv->alen))) {
+		    wordnum++;
+	    }
+	    // second word is acceptable, as a word with prefix or/and suffix?
+	    // hungarian conventions: compounding is acceptable,
+	    // when compound forms consist 2 word, otherwise
+	    // the syllable number of root words is 6, or lesser.
+	    if ((rv) && 
+                    (
+		      ((cpdwordmax==0) || (wordnum+1<cpdwordmax)) || 
+		      ((cpdmaxsyllable==0) || 
+		          (numsyllable <= cpdmaxsyllable))
+		    )
+		&& (
+		   (!checkcompounddup || (rv != rv_first))
+		   )) {
+                      char * m = NULL;
+                      if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
+                      if ((!m || *m == '\0') && compoundend)
+                            m = affix_check_morph((word+i),strlen(word+i), compoundend);
+                      strcat(*result, presult);
+                      line_uniq(m);
+                      if (strchr(m, '\n')) {
+                            strcat(*result, "(");
+                            strcat(*result, line_join(m, '|'));
+                            strcat(*result, ")");
+                      } else {
+                            strcat(*result, m);
+                      }
+                      free(m);
+                      strcat(*result, "\n");
+                      ok = 1;
+	    }
+
+	    numsyllable = oldnumsyllable2;
+	    wordnum = oldwordnum2;
+
+	    // perhaps second word is a compound word (recursive call)
+	    if ((wordnum < maxwordnum) && (ok == 0)) {
+			compound_check_morph((word+i),strlen(word+i), wordnum+1, 
+                             numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
+	    } else {
+		rv=NULL;
+	    }
+	}
+        st[i] = ch;
+	wordnum = oldwordnum;
+	numsyllable = oldnumsyllable;
+    }
+    return 0;
+}    
+
+
+
 
 // check word for suffixes
+
 struct hentry * AffixMgr::suffix_check (const char * word, int len, 
-                       int sfxopts, AffEntry * ppfx)
+       int sfxopts, AffEntry * ppfx, char ** wlst, int maxSug, int * ns, 
+       const FLAG cclass, const FLAG needflag, char in_compound)
 {
     struct hentry * rv = NULL;
+    char result[MAXLNLEN];
+
+    PfxEntry* ep = (PfxEntry *) ppfx;
 
     // first handle the special case of 0 length suffixes
     SfxEntry * se = (SfxEntry *) sStart[0];
+
     while (se) {
-       rv = se->check(word,len, sfxopts, ppfx);
-       if (rv) return rv;
+        if (!cclass || se->getCont()) {
+	    // suffixes are not allowed in beginning of compounds
+            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+	     // except when signed with compoundpermitflag flag
+	     (se->getCont() && compoundpermitflag &&
+	        TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
+              // no circumfix flag in prefix and suffix
+              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
+              // circumfix flag in prefix AND suffix
+              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
+            // fogemorpheme
+              (in_compound || 
+                 !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+	    // pseudoroot on prefix or first suffix
+	      (cclass || 
+                   !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+                   (ppfx && !((ep->getCont()) &&
+                     TESTAFF(ep->getCont(), pseudoroot,
+                       ep->getContLen())))
+              )
+            ) &&
+	    (rv = se->check(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass, needflag))) {
+                sfx=(AffEntry *)se; // BUG: sfx not stateless
+                return rv;
+            }
+        }
        se = se->getNext();
     }
   
     // now handle the general case
     unsigned char sp = *((const unsigned char *)(word + len - 1));
+    SfxEntry * sptr = (SfxEntry *) sStart[sp];
+
+    while (sptr) {
+        if (isRevSubset(sptr->getKey(), word + len - 1, len)
+        ) {
+	    // suffixes are not allowed in beginning of compounds
+            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+	     // except when signed with compoundpermitflag flag
+	     (sptr->getCont() && compoundpermitflag &&
+	        TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
+              // no circumfix flag in prefix and suffix
+              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
+              // circumfix flag in prefix AND suffix
+              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
+            // fogemorpheme
+              (in_compound || 
+                 !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
+	    // pseudoroot on prefix or first suffix
+	      (cclass || 
+                  !(sptr->getCont() && TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
+                  (ppfx && !((ep->getCont()) &&
+                     TESTAFF(ep->getCont(), pseudoroot,
+                       ep->getContLen())))
+              )
+            ) &&
+            (rv = sptr->check(word,len, sfxopts, ppfx, wlst, maxSug, ns, cclass, needflag))) {
+                    sfx=(AffEntry *)sptr; // BUG: sfx not stateless
+            	    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+		    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
+       	   	    if (cclass || sptr->getCont()) {
+				if (!derived) {
+					derived = mystrdup(word);
+				} else {
+					strcpy(result, derived); // XXX check size
+					strcat(result, "\n");
+					strcat(result, word);
+					free(derived);
+					derived = mystrdup(result);
+				}
+		    }
+                return rv;
+	    }
+             sptr = sptr->getNextEQ();
+        } else {
+	     sptr = sptr->getNextNE();
+        }
+    }
 
+    return NULL;
+}
 
+// check word for two-level suffixes
+
+struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len, 
+       int sfxopts, AffEntry * ppfx, const FLAG needflag)
+{
+    struct hentry * rv = NULL;
+
+    // first handle the special case of 0 length suffixes
+    SfxEntry * se = (SfxEntry *) sStart[0];
+    while (se) {
+        if (contclasses[se->getFlag()])
+        {
+            rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
+            if (rv) return rv;
+        }
+        se = se->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)(word + len - 1));
     SfxEntry * sptr = (SfxEntry *) sStart[sp];
 
     while (sptr) {
-        if (isRevSubset(sptr->getKey(),(word+len-1), len)) {
-	     rv = sptr->check(word,len, sfxopts, ppfx);
-             if (rv) {
-                  return rv;
-             }
-             sptr = sptr->getNextEQ();
+        if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+            if (contclasses[sptr->getFlag()])
+            {
+	        rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
+                if (rv) {
+            	    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+		    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
+            	    return rv;
+                }
+            }
+            sptr = sptr->getNextEQ();
         } else {
 	     sptr = sptr->getNextNE();
         }
     }
+
     return NULL;
 }
 
 
+char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len, 
+       int sfxopts, AffEntry * ppfx, const FLAG needflag)
+{
+    char result[MAXLNLEN];
+    char result2[MAXLNLEN];
+    char result3[MAXLNLEN];
+    
+    char * st;
+
+    result[0] = '\0';
+    result2[0] = '\0';
+    result3[0] = '\0';
+
+    // first handle the special case of 0 length suffixes
+    SfxEntry * se = (SfxEntry *) sStart[0];
+    while (se) {
+        if (contclasses[se->getFlag()])
+        {
+            st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
+            if (st) {
+                if (ppfx) {
+                    if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+                }
+                strcat(result, st);
+                free(st);
+                if (se->getMorph()) strcat(result, se->getMorph());
+                strcat(result, "\n");
+            }
+        }
+        se = se->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)(word + len - 1));
+    SfxEntry * sptr = (SfxEntry *) sStart[sp];
+
+    while (sptr) {
+        if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+            if (contclasses[sptr->getFlag()]) 
+            {
+	        st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
+                if (st) {
+            	    sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+		    if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
+                    strcpy(result2, st);
+                    free(st);
+
+                result3[0] = '\0';
+#ifdef DEBUG
+                unsigned short flag = sptr->getFlag();
+                char flagch[2] = &flag;
+                if (flag_mode == FLAG_NUM) {
+                    sprintf(result3, "%d", sptr->getKey());
+                } else if (flag_mode == FLAG_LONG) {
+                    sprintf(result3, "%c%c", flagch[0], flagch[1]);                
+                } else sprintf(result3, "%c", flagch[1]);                
+                strcat(result3, ":");
+#endif
+                if (sptr->getMorph()) strcat(result3, sptr->getMorph());
+                strlinecat(result2, result3);
+                strcat(result2, "\n");
+                strcat(result,  result2);
+                }
+            }
+            sptr = sptr->getNextEQ();
+        } else {
+	     sptr = sptr->getNextNE();
+        }
+    }
+    if (result) return mystrdup(result);
+    return NULL;
+}
+
+char * AffixMgr::suffix_check_morph(const char * word, int len, 
+       int sfxopts, AffEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
+{
+    char result[MAXLNLEN];
+    
+    struct hentry * rv = NULL;
+
+    result[0] = '\0';
+
+    PfxEntry* ep = (PfxEntry *) ppfx;
+
+    // first handle the special case of 0 length suffixes
+    SfxEntry * se = (SfxEntry *) sStart[0];
+    while (se) {
+        if (!cclass || se->getCont()) {
+	    // suffixes are not allowed in beginning of compounds
+            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+	     // except when signed with compoundpermitflag flag
+	     (se->getCont() && compoundpermitflag &&
+	        TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
+              // no circumfix flag in prefix and suffix
+              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
+              // circumfix flag in prefix AND suffix
+              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
+            // fogemorpheme
+              (in_compound || 
+                 !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+	    // pseudoroot on prefix or first suffix
+	      (cclass || 
+                   !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+                   (ppfx && !((ep->getCont()) &&
+                     TESTAFF(ep->getCont(), pseudoroot,
+                       ep->getContLen())))
+              )
+            ))
+            rv = se->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
+         while (rv) {
+           if (ppfx) {
+                if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+            }
+            if (complexprefixes && rv->description) strcat(result, rv->description);
+            if (rv->description && ((!rv->astr) || 
+		   			!TESTAFF(rv->astr, lemma_present, rv->alen)))
+					       strcat(result, rv->word);
+            if (!complexprefixes && rv->description) strcat(result, rv->description);
+            if (se->getMorph()) strcat(result, se->getMorph());
+            strcat(result, "\n");
+            rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+         }
+       }
+       se = se->getNext();
+    }
+  
+    // now handle the general case
+    unsigned char sp = *((const unsigned char *)(word + len - 1));
+    SfxEntry * sptr = (SfxEntry *) sStart[sp];
+
+    while (sptr) {
+        if (isRevSubset(sptr->getKey(), word + len - 1, len)
+        ) {
+	    // suffixes are not allowed in beginning of compounds
+            if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+	     // except when signed with compoundpermitflag flag
+	     (sptr->getCont() && compoundpermitflag &&
+	        TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
+              // no circumfix flag in prefix and suffix
+              ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
+              // circumfix flag in prefix AND suffix
+              ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
+                   circumfix, ep->getContLen())) &&
+               (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
+            // fogemorpheme
+              (in_compound || 
+                 !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
+	    // pseudoroot on first suffix
+	      (cclass || !(sptr->getCont() && 
+	           TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())))
+            )) rv = sptr->check(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
+            while (rv) {
+                    if (ppfx) {
+                        if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+                    }    
+                    if (complexprefixes && rv->description) strcat(result, rv->description);
+                    if (rv->description && ((!rv->astr) || 
+                        !TESTAFF(rv->astr, lemma_present, rv->alen))) strcat(result, rv->word);
+                    if (!complexprefixes && rv->description) strcat(result, rv->description);
+#ifdef DEBUG
+                unsigned short flag = sptr->getKey();
+                char flagch[2] = &flag;
+                if (flag_mode == FLAG_NUM) {
+                    sprintf(result2, "%d", sptr->getKey());
+                } else if (flag_mode == FLAG_LONG) {
+                    sprintf(result2, "%c%c", flagch[0], flagch[1]);                
+                } else sprintf(result2, "%c", flagch[1]);                
+                strcat(result2, ":");
+                strcat(result, result2);
+#endif
+
+                if (sptr->getMorph()) strcat(result, sptr->getMorph());
+                strcat(result, "\n");
+                rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+	    }
+             sptr = sptr->getNextEQ();
+        } else {
+	     sptr = sptr->getNextNE();
+        }
+    }
+
+    if (*result) return mystrdup(result);
+    return NULL;
+}
+
 
 // check if word with affixes is correctly spelled
-struct hentry * AffixMgr::affix_check (const char * word, int len)
+struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
 {
     struct hentry * rv= NULL;
+    if (derived) free(derived);
+    derived =  NULL;
 
     // check all prefixes (also crossed with suffixes if allowed) 
-    rv = prefix_check(word, len);
+    rv = prefix_check(word, len, in_compound, needflag);
     if (rv) return rv;
 
     // if still not found check all suffixes
-    rv = suffix_check(word, len, 0, NULL);
+    rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
+
+    if (havecontclass) {
+        sfx = NULL;
+        pfx = NULL;
+        if (rv) return rv;
+        // if still not found check all two-level suffixes
+        rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
+        if (rv) return rv;
+        // if still not found check all two-level suffixes
+        rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
+    }
     return rv;
 }
 
 
-int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, 
-                       const char * ts, int wl, const char * ap, int al)
+// check if word with affixes is correctly spelled
+char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
+{
+    char result[MAXLNLEN];
+    char * st = NULL;
+
+    *result = '\0';
+    
+    // check all prefixes (also crossed with suffixes if allowed) 
+    st = prefix_check_morph(word, len, in_compound);
+    if (st) {
+        strcat(result, st);
+        free(st);
+    }
+
+    // if still not found check all suffixes    
+    st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
+    if (st) {
+        strcat(result, st);
+        free(st);
+    }
+
+    if (havecontclass) {
+        sfx = NULL;
+        pfx = NULL;
+        // if still not found check all two-level suffixes
+        st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
+        if (st) {
+            strcat(result, st);
+            free(st);
+        }
+
+        // if still not found check all two-level suffixes
+        st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
+        if (st) {
+            strcat(result, st);
+            free(st);
+        }
+    }
+    
+    return mystrdup(result);
+}
+
+
+int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
+    int wl, const unsigned short * ap, unsigned short al, char * bad, int badl)
 {
 
     int nh=0;
 
     // first add root word to list
-
-    if (nh < maxn) {
+    if ((nh < maxn) && !(al && ((pseudoroot && TESTAFF(ap, pseudoroot, al)) ||
+         (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
        wlst[nh].word = mystrdup(ts);
        wlst[nh].allow = (1 == 0);
        nh++;
@@ -756,19 +2649,28 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
 
     // handle suffixes
     for (int i = 0; i < al; i++) {
-       unsigned char c = (unsigned char) ap[i];
+       unsigned short c = (unsigned short) ap[i];
        SfxEntry * sptr = (SfxEntry *)sFlag[c];
        while (sptr) {
-	 char * newword = sptr->add(ts, wl);
-         if (newword) {
-           if (nh < maxn) {
-	      wlst[nh].word = newword;
-              wlst[nh].allow = sptr->allowCross();
-              nh++;
-	   } else {
-	      free(newword);
-           }
-	 }
+         if (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
+                (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0)) &&
+                // check pseudoroot flag
+                !(sptr->getCont() && ((pseudoroot && 
+                      TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
+                  (onlyincompound && 
+                      TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
+                ) {
+	    char * newword = sptr->add(ts, wl);
+            if (newword) {
+                if (nh < maxn) {
+	            wlst[nh].word = newword;
+                    wlst[nh].allow = sptr->allowCross();              
+                nh++;
+	        } else {
+	            free(newword);
+                }
+	    }
+         }
          sptr = (SfxEntry *)sptr ->getFlgNxt();
        }
     }
@@ -779,10 +2681,11 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
     for (int j=1;j<n ;j++)
        if (wlst[j].allow) {
           for (int k = 0; k < al; k++) {
-             unsigned char c = (unsigned char) ap[k];
+             unsigned short c = (unsigned short) ap[k];
              PfxEntry * cptr = (PfxEntry *) pFlag[c];
              while (cptr) {
-                if (cptr->allowCross()) {
+                if (cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
+                        (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
 	            int l1 = strlen(wlst[j].word);
 	            char * newword = cptr->add(wlst[j].word, l1);
                     if (newword) {
@@ -803,19 +2706,28 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
 
     // now handle pure prefixes
     for (int m = 0; m < al; m ++) {
-       unsigned char c = (unsigned char) ap[m];
+       unsigned short c = (unsigned short) ap[m];
        PfxEntry * ptr = (PfxEntry *) pFlag[c];
        while (ptr) {
-	 char * newword = ptr->add(ts, wl);
-         if (newword) {
-	     if (nh < maxn) {
-	        wlst[nh].word = newword;
-                wlst[nh].allow = ptr->allowCross();
-                nh++;
-             } else {
-	        free(newword);
-	     } 
-	 }
+         if (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
+                (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0)) &&
+                // check pseudoroot flag
+                !(ptr->getCont() && ((pseudoroot && 
+                      TESTAFF(ptr->getCont(), pseudoroot, ptr->getContLen())) ||
+                  (onlyincompound && 
+                      TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
+                ) {
+	    char * newword = ptr->add(ts, wl);
+            if (newword) {
+	        if (nh < maxn) {
+	            wlst[nh].word = newword;
+                    wlst[nh].allow = ptr->allowCross();
+                    nh++;
+                } else {
+	            free(newword);
+	        } 
+	    }
+         }
          ptr = (PfxEntry *)ptr ->getFlgNxt();
        }
     }
@@ -824,6 +2736,7 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn,
 }
 
 
+
 // return length of replacing table
 int AffixMgr::get_numrep()
 {
@@ -837,7 +2750,6 @@ struct replentry * AffixMgr::get_reptable()
   return reptable;
 }
 
-
 // return length of character map table
 int AffixMgr::get_nummap()
 {
@@ -851,6 +2763,19 @@ struct mapentry * AffixMgr::get_maptable()
   return maptable;
 }
 
+// return length of word break table
+int AffixMgr::get_numbreak()
+{
+  return numbreak;
+}
+
+// return character map table
+char ** AffixMgr::get_breaktable()
+{
+  if (! breaktable ) return NULL;
+  return breaktable;
+}
+
 // return text encoding of dictionary
 char * AffixMgr::get_encoding()
 {
@@ -860,6 +2785,33 @@ char * AffixMgr::get_encoding()
   return mystrdup(encoding);
 }
 
+// return text encoding of dictionary
+int AffixMgr::get_langnum()
+{
+  return langnum;
+}
+
+// return UTF info table
+struct unicode_info2 * AffixMgr::get_utf_conv()
+{
+  return utf_tbl;
+}
+
+// return double prefix option
+int AffixMgr::get_complexprefixes()
+{
+  return complexprefixes;
+}
+
+FLAG AffixMgr::get_keepcase()
+{
+  return keepcase;
+}
+
+int AffixMgr::get_checksharps()
+{
+  return checksharps;
+}
 
 // return the preferred try string for suggestions
 char * AffixMgr::get_try_string()
@@ -868,11 +2820,101 @@ char * AffixMgr::get_try_string()
   return mystrdup(trystring);
 }
 
+// return the preferred try string for suggestions
+const char * AffixMgr::get_wordchars()
+{
+  return wordchars;
+}
+
+unsigned short * AffixMgr::get_wordchars_utf16(int * len)
+{
+  *len = wordchars_utf16_len;
+  return wordchars_utf16;
+}
+
+// is there compounding?
+int AffixMgr::get_compound()
+{
+  return compoundflag || compoundbegin || numdefcpd;
+}
+
 // return the compound words control flag
-char * AffixMgr::get_compound()
+FLAG AffixMgr::get_compoundflag()
+{
+  return compoundflag;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_forbiddenword()
+{
+  return forbiddenword;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_nosuggest()
+{
+  return nosuggest;
+}
+
+// return the forbidden words flag modify flag
+FLAG AffixMgr::get_pseudoroot()
+{
+  return pseudoroot;
+}
+
+// return the onlyincompound flag
+FLAG AffixMgr::get_onlyincompound()
+{
+  return onlyincompound;
+}
+
+// return the compound word signal flag
+FLAG AffixMgr::get_compoundroot()
 {
-  if (! compound ) return NULL;
-  return compound;
+  return compoundroot;
+}
+
+// return the compound begin signal flag
+FLAG AffixMgr::get_compoundbegin()
+{
+  return compoundbegin;
+}
+
+// return the value of checknum
+int AffixMgr::get_checknum()
+{
+  return checknum;
+}
+
+// return the value of prefix
+const char * AffixMgr::get_prefix()
+{
+  if (pfx) return ((PfxEntry *)pfx)->getKey();
+  return NULL;
+}
+
+// return the value of suffix
+const char * AffixMgr::get_suffix()
+{
+  return sfxappnd;
+}
+
+// return the value of derived form (base word with first suffix).
+const char * AffixMgr::get_derived()
+{
+  return derived;
+}
+
+// return the value of suffix
+const char * AffixMgr::get_version()
+{
+  return version;
+}
+
+// return lemma_present flag
+FLAG AffixMgr::get_lemma_present()
+{
+  return lemma_present;
 }
 
 // utility method to look up root words in hash table
@@ -882,12 +2924,36 @@ struct hentry * AffixMgr::lookup(const char * word)
   return pHMgr->lookup(word);
 }
 
+// return the value of suffix
+const int AffixMgr::have_contclass()
+{
+  return havecontclass;
+}
+
+// return utf8
+int AffixMgr::get_utf8()
+{
+  return utf8;
+}
+
 // return nosplitsugs
-bool AffixMgr::get_nosplitsugs(void)
+int AffixMgr::get_maxngramsugs(void)
+{
+  return maxngramsugs;
+}
+
+// return nosplitsugs
+int AffixMgr::get_nosplitsugs(void)
 {
   return nosplitsugs;
 }
 
+// return sugswithdots
+int AffixMgr::get_sugswithdots(void)
+{
+  return sugswithdots;
+}
+
 /* parse in the try string */
 int  AffixMgr::parse_try(char * line)
 {
@@ -899,7 +2965,7 @@ int  AffixMgr::parse_try(char * line)
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
       if (*piece != '\0') {
           switch(i) {
 	      case 0: { np++; break; }
@@ -929,11 +2995,32 @@ int  AffixMgr::parse_set(char * line)
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
       if (*piece != '\0') {
           switch(i) {
 	     case 0: { np++; break; }
-             case 1: { encoding = mystrdup(piece); np++; break; }
+             case 1: { encoding = mystrdup(piece); 
+                    if (strcmp(encoding, "UTF-8") == 0) {
+                        unicode_info * uni = get_utf_cs();
+                        utf8 = 1;
+                        utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));
+                        if (utf_tbl) {
+                            int j;
+                            for (j = 0; j < CONTSIZE; j++) {
+                                utf_tbl[j].cletter = 0;
+                                utf_tbl[j].clower = j;
+                                utf_tbl[j].cupper = j;
+                            }
+                            for (j = 0; j < get_utf_cs_len(); j++) {
+                                utf_tbl[uni[j].c].cletter = 1;
+                                utf_tbl[uni[j].c].clower = uni[j].clower;
+                                utf_tbl[uni[j].c].cupper = uni[j].cupper;
+                            }
+                            // set Azeri, Turkish spec. lowercasing
+                            set_spec_utf8_encoding();
+                        } else return 1;
+                    }
+                    np++; break; }
 	     default: break;
           }
           i++;
@@ -947,49 +3034,169 @@ int  AffixMgr::parse_set(char * line)
    return 0;
 }
 
+/* parse flag */
+int AffixMgr::parse_flag(char * line, unsigned short * out, char * name)
+{
+   if (*out) {
+      fprintf(stderr,"error: duplicate %s strings\n", name);
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+      if (*piece != '\0') {
+          switch(i) {
+	      case 0: { np++; break; }
+              case 1: { 
+                *out = pHMgr->decode_flag(piece);
+                np++;
+                break;
+              }
+	      default: break;
+          }
+          i++;
+      }
+      free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing %s information\n", name);
+      return 1;
+   } 
+   return 0;
+}
+
+/* parse flag */
+int AffixMgr::parse_num(char * line, int * out, char * name)
+{
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+      if (*piece != '\0') {
+          switch(i) {
+	      case 0: { np++; break; }
+              case 1: { 
+                *out = atoi(piece);
+                np++;
+                break;
+              }
+	      default: break;
+          }
+          i++;
+      }
+      free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing %s information\n", name);
+      return 1;
+   } 
+   return 0;
+}
 
-/* parse in the flag used by the controlled compound words */
-int  AffixMgr::parse_cpdflag(char * line)
+/* parse in the wordchars string */
+int  AffixMgr::parse_wordchars(char * line)
 {
-   if (compound) {
-      fprintf(stderr,"error: duplicate compound flags used\n");
+   if (wordchars) {
+      fprintf(stderr,"error: duplicate WORDCHARS strings\n");
       return 1;
    }
    char * tp = line;
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   w_char w[MAXWORDLEN];
+   while ((piece=mystrsep(&tp, 0))) {
+      if (*piece != '\0') {
+          switch(i) {
+	      case 0: { np++; break; }
+              case 1: {
+                if (!utf8) {
+                    wordchars = mystrdup(piece);
+                } else {
+                    int n = u8_u16(w, MAXWORDLEN, piece);
+                    if (n > 0) {
+                        flag_qsort((unsigned short *) w, 0, n);
+                        wordchars_utf16 = (unsigned short *) malloc(n * sizeof(unsigned short));
+                        if (!wordchars_utf16) return 1;
+                        memcpy(wordchars_utf16, w, n * sizeof(unsigned short));
+                    }
+                    wordchars_utf16_len = n;
+                }
+                np++;
+                break;
+              }
+	      default: break;
+          }
+          i++;
+      }
+      free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing WORDCHARS information\n");
+      return 1;
+   } 
+   return 0;
+}
+
+
+/* parse in the max syllablecount of compound words and  */
+int  AffixMgr::parse_cpdsyllable(char * line)
+{
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   w_char w[MAXWORDLEN];
+   while ((piece=mystrsep(&tp, 0))) {
       if (*piece != '\0') {
           switch(i) {
 	     case 0: { np++; break; }
-             case 1: { compound = mystrdup(piece); np++; break; }
+             case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
+             case 2: {
+                if (!utf8) {
+                    cpdvowels = mystrdup(piece);
+                } else {
+                    int n = u8_u16(w, MAXWORDLEN, piece);
+                    if (n > 0) {
+                        flag_qsort((unsigned short *) w, 0, n);
+                        cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
+                        if (!cpdvowels_utf16) return 1;
+                        memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
+                    }
+                    cpdvowels_utf16_len = n;
+                }
+                np++;
+                break;
+             }
 	     default: break;
           }
           i++;
       }
       free(piece);
    }
-   if (np != 2) {
-      fprintf(stderr,"error: missing compound flag information\n");
+   if (np < 2) {
+      fprintf(stderr,"error: missing compoundsyllable information\n");
       return 1;
    }
+   if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
    return 0;
 }
 
-
-/* parse in the min compound word length */
-int  AffixMgr::parse_cpdmin(char * line)
+/* parse in the flags, that increments syllable number */
+int  AffixMgr::parse_syllablenum(char * line)
 {
    char * tp = line;
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
       if (*piece != '\0') {
           switch(i) {
 	     case 0: { np++; break; }
-             case 1: { cpdmin = atoi(piece); np++; break; }
+             case 1: { cpdsyllablenum = mystrdup(piece); np++; break; }
 	     default: break;
           }
           i++;
@@ -997,14 +3204,12 @@ int  AffixMgr::parse_cpdmin(char * line)
       free(piece);
    }
    if (np != 2) {
-      fprintf(stderr,"error: missing compound min information\n");
+      fprintf(stderr,"error: missing cpdsyllablenum information\n");
       return 1;
-   } 
-   if ((cpdmin < 1) || (cpdmin > 50)) cpdmin = 3;
+   }
    return 0;
 }
 
-
 /* parse in the typical fault correcting table */
 int  AffixMgr::parse_reptable(char * line, FILE * af)
 {
@@ -1016,7 +3221,7 @@ int  AffixMgr::parse_reptable(char * line, FILE * af)
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
        if (*piece != '\0') {
           switch(i) {
 	     case 0: { np++; break; }
@@ -1028,6 +3233,7 @@ int  AffixMgr::parse_reptable(char * line, FILE * af)
                           return 1;
                        }
                        reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
+                       if (!reptable) return 1;
                        np++;
                        break;
 	             }
@@ -1045,13 +3251,13 @@ int  AffixMgr::parse_reptable(char * line, FILE * af)
    /* now parse the numrep lines to read in the remainder of the table */
    char * nl = line;
    for (int j=0; j < numrep; j++) {
-        fgets(nl,MAXLNLEN,af);
+        if (!fgets(nl,MAXLNLEN,af)) return 1;
         mychomp(nl);
         tp = nl;
         i = 0;
         reptable[j].pattern = NULL;
-        reptable[j].replacement = NULL;
-        while ((piece=mystrsep(&tp,' '))) {
+        reptable[j].pattern2 = NULL;
+        while ((piece=mystrsep(&tp, 0))) {
            if (*piece != '\0') {
                switch(i) {
                   case 0: {
@@ -1063,14 +3269,14 @@ int  AffixMgr::parse_reptable(char * line, FILE * af)
                              break;
 		          }
                   case 1: { reptable[j].pattern = mystrdup(piece); break; }
-                  case 2: { reptable[j].replacement = mystrdup(piece); break; }
+                  case 2: { reptable[j].pattern2 = mystrdup(piece); break; }
 		  default: break;
                }
                i++;
            }
            free(piece);
         }
-	if ((!(reptable[j].pattern)) || (!(reptable[j].replacement))) {
+	if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
 	     fprintf(stderr,"error: replacement table is corrupt\n");
              return 1;
         }
@@ -1078,6 +3284,155 @@ int  AffixMgr::parse_reptable(char * line, FILE * af)
    return 0;
 }
 
+/* parse in the checkcompoundpattern table */
+int  AffixMgr::parse_checkcpdtable(char * line, FILE * af)
+{
+   if (numcheckcpd != 0) {
+      fprintf(stderr,"error: duplicate compound pattern tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numcheckcpd = atoi(piece);
+	               if (numcheckcpd < 1) {
+			  fprintf(stderr,"incorrect number of entries in compound pattern table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       checkcpdtable = (replentry *) malloc(numcheckcpd * sizeof(struct replentry));
+                       if (!checkcpdtable) return 1;
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing compound pattern table information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numcheckcpd lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numcheckcpd; j++) {
+        if (!fgets(nl,MAXLNLEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        checkcpdtable[j].pattern = NULL;
+        checkcpdtable[j].pattern2 = NULL;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
+		                 fprintf(stderr,"error: compound pattern table is corrupt\n");
+                                 free(piece);
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: { checkcpdtable[j].pattern = mystrdup(piece); break; }
+                  case 2: { checkcpdtable[j].pattern2 = mystrdup(piece); break; }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
+	     fprintf(stderr,"error: compound pattern table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
+
+/* parse in the compound rule table */
+int  AffixMgr::parse_defcpdtable(char * line, FILE * af)
+{
+   if (numdefcpd != 0) {
+      fprintf(stderr,"error: duplicate compound rule tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numdefcpd = atoi(piece);
+	               if (numdefcpd < 1) {
+			  fprintf(stderr,"incorrect number of entries in compound rule table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
+                       if (!defcpdtable) return 1;
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing compound rule table information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numdefcpd lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numdefcpd; j++) {
+        if (!fgets(nl,MAXLNLEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        defcpdtable[j].def = NULL;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
+		                 fprintf(stderr,"error: compound rule table is corrupt\n");
+                                 free(piece);
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: { 
+                            defcpdtable[j].len = 
+                                pHMgr->decode_flags(&(defcpdtable[j].def), piece);
+                            break; 
+                           }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!defcpdtable[j].len) {
+	     fprintf(stderr,"error: compound rule table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
 
 
 /* parse in the character map table */
@@ -1091,7 +3446,7 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
    char * piece;
    int i = 0;
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
        if (*piece != '\0') {
           switch(i) {
 	     case 0: { np++; break; }
@@ -1103,6 +3458,7 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
                           return 1;
                        }
                        maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
+                       if (!maptable) return 1;
                        np++;
                        break;
 	             }
@@ -1120,13 +3476,13 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
    /* now parse the nummap lines to read in the remainder of the table */
    char * nl = line;
    for (int j=0; j < nummap; j++) {
-        fgets(nl,MAXLNLEN,af);
+        if (!fgets(nl,MAXLNLEN,af)) return 1;
         mychomp(nl);
         tp = nl;
         i = 0;
         maptable[j].set = NULL;
         maptable[j].len = 0;
-        while ((piece=mystrsep(&tp,' '))) {
+        while ((piece=mystrsep(&tp, 0))) {
            if (*piece != '\0') {
                switch(i) {
                   case 0: {
@@ -1137,8 +3493,24 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
                              }
                              break;
 		          }
-                  case 1: { maptable[j].set = mystrdup(piece); 
-		            maptable[j].len = strlen(maptable[j].set);
+                  case 1: {
+                            maptable[j].len = 0;
+                            maptable[j].set = NULL;
+                            maptable[j].set_utf16 = NULL;
+                            if (!utf8) {
+                                maptable[j].set = mystrdup(piece); 
+                                maptable[j].len = strlen(maptable[j].set);
+                            } else {
+                                w_char w[MAXWORDLEN];
+                                int n = u8_u16(w, MAXWORDLEN, piece);
+                                if (n > 0) {
+                                    flag_qsort((unsigned short *) w, 0, n);
+                                    maptable[j].set_utf16 = (w_char *) malloc(n * sizeof(w_char));
+                                    if (!maptable[j].set_utf16) return 1;
+                                    memcpy(maptable[j].set_utf16, w, n * sizeof(w_char));
+                                }
+                                maptable[j].len = n;
+                            }
                             break; }
 		  default: break;
                }
@@ -1146,7 +3518,7 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
            }
            free(piece);
         }
-	if ((!(maptable[j].set)) || (!(maptable[j].len))) {
+	if ((!(maptable[j].set || maptable[j].set_utf16)) || (!(maptable[j].len))) {
 	     fprintf(stderr,"error: map table is corrupt\n");
              return 1;
         }
@@ -1154,13 +3526,134 @@ int  AffixMgr::parse_maptable(char * line, FILE * af)
    return 0;
 }
 
+/* parse in the word breakpoint table */
+int  AffixMgr::parse_breaktable(char * line, FILE * af)
+{
+   if (numbreak != 0) {
+      fprintf(stderr,"error: duplicate word breakpoint tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numbreak = atoi(piece);
+	               if (numbreak < 1) {
+			  fprintf(stderr,"incorrect number of entries in BREAK table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       breaktable = (char **) malloc(numbreak * sizeof(char *));
+                       if (!breaktable) return 1;
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      fprintf(stderr,"error: missing word breakpoint table information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numbreak lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numbreak; j++) {
+        if (!fgets(nl,MAXLNLEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"BREAK",5) != 0) {
+		                 fprintf(stderr,"error: BREAK table is corrupt\n");
+                                 free(piece);
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: {
+                            breaktable[j] = mystrdup(piece);
+                            break;
+                          }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!breaktable) {
+	     fprintf(stderr,"error: BREAK table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
 
+/* parse in the flag used by affix_check() */
+int  AffixMgr::parse_lang(char * line)
+{
+   if (lang != NULL) {
+      fprintf(stderr,"error: duplicate LANG used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+      if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                    lang = mystrdup(piece);
+                    langnum = get_lang_num(piece);
+                    set_spec_utf8_encoding();
+                    np++; break; 
+                }
+	     default: break;
+          }
+          i++;
+      }
+      free(piece);
+   }
+   if (np < 2) {
+      fprintf(stderr,"error: missing LANG information\n");
+      return 1;
+   }
+   return 0;
+}
 
+/* parse in the version string */
+int  AffixMgr::parse_version(char * line)
+{
+   if (version) {
+      fprintf(stderr,"error: duplicate VERSION strings\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece = mystrsep(&tp, 0);
+   version = mystrdup(tp);
+   free(piece);
+   return 0;
+}
 
-int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
+int  AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags)
 {
    int numents = 0;      // number of affentry structures to parse
-   char achar='\0';      // affix char identifier
+
+   unsigned short aflag = 0;      // affix char identifier
+
    short ff=0;
    struct affentry * ptr= NULL;
    struct affentry * nptr= NULL;
@@ -1170,29 +3663,51 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
    char * piece;
    int i = 0;
 
+   // checking lines with bad syntax
+   int basefieldnum = 0;
+
    // split affix header line into pieces
 
    int np = 0;
-   while ((piece=mystrsep(&tp,' '))) {
+   while ((piece=mystrsep(&tp, 0))) {
       if (*piece != '\0') {
           switch(i) {
              // piece 1 - is type of affix
              case 0: { np++; break; }
           
              // piece 2 - is affix char
-             case 1: { np++; achar = *piece; break; }
-
+             case 1: { 
+                    np++;
+                    aflag = pHMgr->decode_flag(piece);
+                    if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
+                        ((at == 'P') && (dupflags[aflag] & dupPFX))) {
+                        fprintf(stderr, "error: duplicate affix flag %s in line %s\n", piece, nl);
+                        // return 1; XXX permissive mode for bad dictionaries
+                    }
+                    dupflags[aflag] += ((at == 'S') ? dupSFX : dupPFX);
+                    break; 
+                    }
              // piece 3 - is cross product indicator 
-             case 2: { np++; if (*piece == 'Y') ff = XPRODUCT; break; }
+             case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
 
              // piece 4 - is number of affentries
              case 3: { 
                        np++;
                        numents = atoi(piece); 
+                       if (numents == 0) {
+                           char * err = pHMgr->encode_flag(aflag);
+                           fprintf(stderr, "error: affix %s header has incorrect entry count in line %s\n",
+                                   err, nl);
+                           free(err);
+                           return 1;
+                       }
                        ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
-                       ptr->xpflg = ff;
-                       ptr->achar = achar;
-                       break;
+                       if (!ptr) return 1;
+                       ptr->opts = ff;
+                       if (utf8) ptr->opts += aeUTF8;
+                       if (pHMgr->is_aliasf()) ptr->opts += aeALIASF;
+                       if (pHMgr->is_aliasm()) ptr->opts += aeALIASM;
+                       ptr->aflag = aflag;
                      }
 
 	     default: break;
@@ -1203,7 +3718,9 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
    }
    // check to make sure we parsed enough pieces
    if (np != 4) {
-       fprintf(stderr, "error: affix %c header has insufficient data in line %s\n",achar,nl);
+       char * err = pHMgr->encode_flag(aflag); 
+       fprintf(stderr, "error: affix %s header has insufficient data in line %s\n", err, nl);
+       free(err);
        free(ptr);
        return 1;
    }
@@ -1213,40 +3730,45 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
 
    // now parse numents affentries for this affix
    for (int j=0; j < numents; j++) {
-      fgets(nl,MAXLNLEN,af);
+      if (!fgets(nl,MAXLNLEN,af)) return 1;
       mychomp(nl);
       tp = nl;
       i = 0;
       np = 0;
 
       // split line into pieces
-      while ((piece=mystrsep(&tp,' '))) {
+      while ((piece=mystrsep(&tp, 0))) {
          if (*piece != '\0') {
              switch(i) {
-
                 // piece 1 - is type
                 case 0: { 
                           np++;
-                          if (nptr != ptr) nptr->xpflg = ptr->xpflg;
+                          if (nptr != ptr) nptr->opts = ptr->opts;
                           break;
                         }
 
                 // piece 2 - is affix char
                 case 1: { 
 		          np++;
-                          if (*piece != achar) {
-                              fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
+                          if (pHMgr->decode_flag(piece) != aflag) {
+                              char * err = pHMgr->encode_flag(aflag);
+                              fprintf(stderr, "error: affix %s is corrupt near line %s\n", err, nl);
                               fprintf(stderr, "error: possible incorrect count\n");
+                              free(err);
                               free(piece);
                               return 1;
                           }
-                          if (nptr != ptr) nptr->achar = ptr->achar;
+
+                          if (nptr != ptr) nptr->aflag = ptr->aflag;
                           break;
 		        }
 
                 // piece 3 - is string to strip or 0 for null 
                 case 2: { 
                           np++;
+                          if (complexprefixes) {
+                            if (utf8) reverseword_utf(piece); else reverseword(piece);
+                          }
                           nptr->strip = mystrdup(piece);
                           nptr->stripl = strlen(nptr->strip);
                           if (strcmp(nptr->strip,"0") == 0) {
@@ -1259,8 +3781,39 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
 
                 // piece 4 - is affix string or 0 for null
                 case 3: { 
+                          char * dash; 	
+                          nptr->morphcode = NULL;
+                          nptr->contclass = NULL;
+                          nptr->contclasslen = 0;
 		          np++;
-                          nptr->appnd = mystrdup(piece);
+                          dash = strchr(piece, '/');
+			  if (dash) {
+                            *dash = '\0';
+                            if (complexprefixes) {
+                                if (utf8) reverseword_utf(piece); else reverseword(piece);
+                            }
+                            nptr->appnd = mystrdup(piece);
+                            
+                            if (pHMgr->is_aliasf()) {
+                                int index = atoi(dash + 1);
+                                nptr->contclasslen = pHMgr->get_aliasf(index, &(nptr->contclass));
+                            } else {
+                                nptr->contclasslen = pHMgr->decode_flags(&(nptr->contclass), dash + 1);
+                                flag_qsort(nptr->contclass, 0, nptr->contclasslen);
+                            }
+                            *dash = '/';
+
+                            havecontclass = 1;
+                            for (unsigned short i = 0; i < nptr->contclasslen; i++) {
+                              contclasses[(nptr->contclass)[i]] = 1;
+                            }
+                          } else {
+                            if (complexprefixes) {
+                                if (utf8) reverseword_utf(piece); else reverseword(piece);
+                            }
+                            nptr->appnd = mystrdup(piece);       
+                          }
+			  
                           nptr->appndl = strlen(nptr->appnd);
                           if (strcmp(nptr->appnd,"0") == 0) {
                               free(nptr->appnd);
@@ -1271,7 +3824,77 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
                         }
 
                 // piece 5 - is the conditions descriptions
-                case 4: { np++; encodeit(nptr,piece); }
+                case 4: { 
+                          np++;
+                          if (complexprefixes) {
+                            int neg = 0;
+                            if (utf8) reverseword_utf(piece); else reverseword(piece);
+                            // reverse condition
+                            for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
+                                switch(*k) {
+                                  case '[': {
+                                        if (neg) *(k+1) = '['; else *k = ']';
+                                        break;
+                                    }
+                                  case ']': {
+                                        *k = '[';
+                                        if (neg) *(k+1) = '^';
+                                        neg = 0;
+                                        break;
+                                    }
+                                  case '^': {
+                                       if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
+                                       break;
+                                    }
+                                  default: {
+                                    if (neg) *(k+1) = *k;
+                                  }
+                               }
+                            }
+                          }
+                          if (nptr->stripl && (strcmp(piece, ".") != 0) &&
+                            redundant_condition(at, nptr->strip, nptr->stripl, piece, nl))
+                                strcpy(piece, ".");
+                          if (encodeit(nptr,piece)) return 1;
+                         break;
+                }
+                
+                case 5: {
+		          np++;
+                          if (pHMgr->is_aliasm()) {
+                            int index = atoi(piece);
+                            nptr->morphcode = pHMgr->get_aliasm(index);
+                          } else {
+                            if (complexprefixes) {
+                                if (utf8) reverseword_utf(piece); else reverseword(piece);
+                            }
+                            nptr->morphcode = mystrdup(piece);
+                          }
+                          break; 
+                }
+
+                case 6: {
+                // XXX deprecated syntax
+                          np++;
+                          if (nptr->contclass) {
+                            fprintf(stderr, "error: affix rule contains two contclass "
+                            "(%s and %s by deprecated syntax).\n", nptr->contclass, piece);
+                          } else {
+                            if (pHMgr->is_aliasf()) {
+                                int index = atoi(piece);
+                                nptr->contclasslen = pHMgr->get_aliasf(index, &(nptr->contclass));
+                            } else {
+                                nptr->contclasslen = pHMgr->decode_flags(&(nptr->contclass), piece);
+			        flag_qsort(nptr->contclass, 0, nptr->contclasslen);
+                            }
+                            havecontclass = 1;
+                            for (unsigned short i = 0; i < nptr->contclasslen; i++) {
+                              contclasses[(nptr->contclass)[i]] = 1;
+                            }
+                          }
+                          break;
+                
+                }
 
 		default: break;
              }
@@ -1280,14 +3903,27 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
          free(piece);
       }
       // check to make sure we parsed enough pieces
-      if (np != 5) {
-          fprintf(stderr, "error: affix %c is corrupt near line %s\n",achar,nl);
+      if (np < 5) {
+          char * err = pHMgr->encode_flag(aflag);
+          fprintf(stderr, "error: affix %s is corrupt near line %s\n", err, nl);
+          free(err);
           free(ptr);
           return 1;
       }
+
+#if DEBUG
+      // detect unnecessary fields, excepting comments
+      if (basefieldnum) {
+        int fieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
+          if (fieldnum != basefieldnum) 
+            fprintf(stderr, "warning - bad field number:\n%s\n", nl);
+      } else {
+        basefieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
+      }
+#endif
       nptr++;
    }
-         
+ 
    // now create SfxEntry or PfxEntry objects and use links to
    // build an ordered (sorted by affix string) list
    nptr = ptr;
@@ -1304,3 +3940,81 @@ int  AffixMgr::parse_affix(char * line, const char at, FILE * af)
    free(ptr);
    return 0;
 }
+
+void AffixMgr::set_spec_utf8_encoding() {
+    if (utf8) {
+        // In Azeri and Turkish, I and i dictinct letters:
+        // There are a dotless lower case i pair of upper `I',
+        // and an upper I with dot pair of lower `i'. 
+        if ((langnum == LANG_az) || (langnum == LANG_tr)) {
+            utf_tbl[0x0049].clower = 0x0131;
+            utf_tbl[0x0069].cupper = 0x0130;
+        }
+    }
+}
+
+int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, char * line) {
+  int condl = strlen(cond);
+  int i;
+  int j;
+  int neg;
+  int in;
+  if (ft == 'P') { // prefix
+    if (strncmp(strip, cond, condl) == 0) return 1;
+    if (utf8) {
+    } else {
+      for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
+        if (cond[j] != '[') {
+          if (cond[j] != strip[i]) {
+            fprintf(stderr, "warning - incompatible stripping characters and condition:\n%s\n", line);
+          }
+        } else {
+          neg = (cond[j+1] == '^') ? 1 : 0;
+          in = 0;
+          do {
+            j++;
+            if (strip[i] == cond[j]) in = 1;
+          } while ((j < (condl - 1)) && (cond[j] != ']'));
+          if (j == (condl - 1) && (cond[j] != ']')) {
+            fprintf(stderr, "error - missing ] in condition:\n%s\n", line);
+            return 0;
+          }
+          if ((!neg && !in) || (neg && in)) {
+            fprintf(stderr, "warning - incompatible stripping characters and condition:\n%s\n", line);
+            return 0;          
+          }
+        }
+      }
+      if (j >= condl) return 1;
+    }
+  } else { // suffix
+    if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
+    if (utf8) {
+    } else {
+      for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
+        if (cond[j] != ']') {
+          if (cond[j] != strip[i]) {
+            fprintf(stderr, "warning - incompatible stripping characters and condition:\n%s\n", line);
+          }
+        } else {
+          in = 0;
+          do {
+            j--;
+            if (strip[i] == cond[j]) in = 1;
+          } while ((j > 0) && (cond[j] != '['));
+          if ((j == 0) && (cond[j] != '[')) {
+            fprintf(stderr, "error - missing ] in condition:\n%s\n", line);
+            return 0;
+          }
+          neg = (cond[j+1] == '^') ? 1 : 0;
+          if ((!neg && !in) || (neg && in)) {
+            fprintf(stderr, "warning - incompatible stripping characters and condition:\n%s\n", line);
+            return 0;          
+          }
+        }
+      }
+      if (j < 0) return 1;
+    }    
+  }
+  return 0;
+}
diff --git a/src/myspell/affixmgr.hxx b/src/myspell/affixmgr.hxx
index 6cbd112..e93ba8e 100644
--- a/src/myspell/affixmgr.hxx
+++ b/src/myspell/affixmgr.hxx
@@ -1,69 +1,203 @@
 #ifndef _AFFIXMGR_HXX_
 #define _AFFIXMGR_HXX_
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
 
 #include "atypes.hxx"
 #include "baseaffix.hxx"
 #include "hashmgr.hxx"
-#include <cstdio>
+
+// check flag duplication
+#define dupSFX        (1 << 0)
+#define dupPFX        (1 << 1)
 
 class AffixMgr
 {
 
   AffEntry *          pStart[SETSIZE];
   AffEntry *          sStart[SETSIZE];
-  AffEntry *          pFlag[SETSIZE];
-  AffEntry *          sFlag[SETSIZE];
+  AffEntry *          pFlag[CONTSIZE];
+  AffEntry *          sFlag[CONTSIZE];
   HashMgr *           pHMgr;
   char *              trystring;
   char *              encoding;
-  char *              compound;
+  struct cs_info *    csconv;
+  int                 utf8;
+  struct unicode_info2 * utf_tbl;
+  int                 complexprefixes;
+  FLAG                compoundflag;  
+  FLAG                compoundbegin;
+  FLAG                compoundmiddle;
+  FLAG                compoundend;
+  FLAG                compoundroot;
+  FLAG                compoundforbidflag;
+  FLAG                compoundpermitflag;
+  int                 checkcompounddup;
+  int                 checkcompoundrep;
+  int                 checkcompoundcase;
+  int                 checkcompoundtriple;
+  FLAG                forbiddenword;
+  FLAG                nosuggest;
+  FLAG                pseudoroot;
   int                 cpdmin;
   int                 numrep;
   replentry *         reptable;
   int                 nummap;
   mapentry *          maptable;
-  bool                nosplitsugs;
-
+  int                 numbreak;
+  char **             breaktable;
+  int                 numcheckcpd;
+  replentry *         checkcpdtable;
+  int                 numdefcpd;
+  flagentry *         defcpdtable;
+  int                 maxngramsugs;
+  int                 nosplitsugs;
+  int                 sugswithdots;
+  int                 cpdwordmax;
+  int                 cpdmaxsyllable;
+  char *              cpdvowels;
+  w_char *            cpdvowels_utf16;
+  int                 cpdvowels_utf16_len;
+  char *              cpdsyllablenum;
+  const char *        pfxappnd; // BUG: not stateless
+  const char *        sfxappnd; // BUG: not stateless
+  FLAG                sfxflag;  // BUG: not stateless
+  char *              derived;  // BUG: not stateless
+  AffEntry *          sfx;      // BUG: not stateless
+  AffEntry *          pfx;      // BUG: not stateless
+  int                 checknum;
+  char *              wordchars;
+  unsigned short *    wordchars_utf16;
+  int                 wordchars_utf16_len;
+  char *              version;
+  char *              lang;
+  int                 langnum;
+  FLAG                lemma_present;
+  FLAG                circumfix;
+  FLAG                onlyincompound;
+  FLAG                keepcase;
+  int                 checksharps;
 
+  int                 havecontclass; // boolean variable
+  char                contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
+  flag                flag_mode;
+  
 public:
  
   AffixMgr(const char * affpath, HashMgr * ptr);
   ~AffixMgr();
-  struct hentry *     affix_check(const char * word, int len);
-  struct hentry *     prefix_check(const char * word, int len);
-  struct hentry *     suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx);
-  int                 expand_rootword(struct guessword * wlst, int maxn, 
-                             const char * ts, int wl, const char * ap, int al);
-  struct hentry *     compound_check(const char * word, int len, char compound_flag);
+  struct hentry *     affix_check(const char * word, int len,
+            const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT);
+  struct hentry *     prefix_check(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  struct hentry *     prefix_check_twosfx(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  struct hentry *     suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx,
+			char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL,
+                        const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+  struct hentry *     suffix_check_twosfx(const char * word, int len,
+            int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
+
+  char * affix_check_morph(const char * word, int len,
+                    const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+  char * prefix_check_morph(const char * word, int len,
+                    char in_compound, const FLAG needflag = FLAG_NULL);
+  char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx,
+            const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+
+  char * prefix_check_twosfx_morph(const char * word, int len,
+            char in_compound, const FLAG needflag = FLAG_NULL);
+  char * suffix_check_twosfx_morph(const char * word, int len,
+            int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
+
+  int                 expand_rootword(struct guessword * wlst, int maxn, const char * ts,
+                        int wl, const unsigned short * ap, unsigned short al, char * bad, int);
+
+  int                 get_syllable (const char * word, int wlen);
+  int                 cpdrep_check(const char * word, int len);
+  int                 cpdpat_check(const char * word, int len);
+  int                 defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all);
+  int                 cpdcase_check(const char * word, int len);
+  int                 candidate_check(const char * word, int len);
+  struct hentry *     compound_check(const char * word, int len,
+                              short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
+                              char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug);
+
+  int compound_check_morph(const char * word, int len,
+                              short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
+                              char hu_mov_rule, char ** result, char * partresult);
+
   struct hentry *     lookup(const char * word);
   int                 get_numrep();
   struct replentry *  get_reptable();
   int                 get_nummap();
   struct mapentry *   get_maptable();
+  int                 get_numbreak();
+  char **             get_breaktable();
   char *              get_encoding();
+  int                 get_langnum();
+  struct unicode_info2 * get_utf_conv();
   char *              get_try_string();
-  char *              get_compound();
-  bool                get_nosplitsugs();
-             
+  const char *        get_wordchars();
+  unsigned short * get_wordchars_utf16(int * len);
+  int                 get_compound();
+  FLAG                get_compoundflag();
+  FLAG                get_compoundbegin();
+  FLAG                get_forbiddenword();
+  FLAG                get_nosuggest();
+  FLAG                get_pseudoroot();
+  FLAG                get_onlyincompound();
+  FLAG                get_compoundroot();
+  FLAG                get_lemma_present();
+  int                 get_checknum();
+  char *              get_possible_root();
+  const char *        get_prefix();
+  const char *        get_suffix();
+  const char *        get_derived();
+  const char *        get_version();
+  const int           have_contclass();
+  int                 get_utf8();
+  int                 get_complexprefixes();
+  char *              get_suffixed(char );
+  int                 get_maxngramsugs();
+  int                 get_nosplitsugs();
+  int                 get_sugswithdots(void);
+  FLAG                get_keepcase(void);
+  int                 get_checksharps(void);
+
 private:
   int  parse_file(const char * affpath);
   int  parse_try(char * line);
   int  parse_set(char * line);
+  int  parse_flag(char * line, unsigned short * out, char * name);
+  int  parse_num(char * line, int * out, char * name);
   int  parse_cpdflag(char * line);
-  int  parse_cpdmin(char * line);
+  int  parse_cpdforbid(char * line);
+  int  parse_forbid(char * line);
+  int  parse_cpdsyllable(char * line);
+  int  parse_syllablenum(char * line);
   int  parse_reptable(char * line, FILE * af);
   int  parse_maptable(char * line, FILE * af);
-  int  parse_affix(char * line, const char at, FILE * af);
+  int  parse_breaktable(char * line, FILE * af);
+  int  parse_checkcpdtable(char * line, FILE * af);
+  int  parse_defcpdtable(char * line, FILE * af);
+  int  parse_affix(char * line, const char at, FILE * af, char * dupflags);
+  int  parse_wordchars(char * line);
+  int  parse_lang(char * line);
+  int  parse_version(char * line);
 
-  void encodeit(struct affentry * ptr, char * cs);
+  int encodeit(struct affentry * ptr, char * cs);
   int build_pfxtree(AffEntry* pfxptr);
   int build_sfxtree(AffEntry* sfxptr);
-  AffEntry* process_sfx_in_order(AffEntry* ptr, AffEntry* nptr);
-  AffEntry* process_pfx_in_order(AffEntry* ptr, AffEntry* nptr);
-  int process_pfx_tree_to_list();
-  int process_sfx_tree_to_list();
   int process_pfx_order();
   int process_sfx_order();
+  AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
+  AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
+  int process_pfx_tree_to_list();
+  int process_sfx_tree_to_list();
+  void set_spec_utf8_encoding();
+  int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
 };
 
 #endif
diff --git a/src/myspell/atypes.hxx b/src/myspell/atypes.hxx
index a10c69d..c8c9257 100644
--- a/src/myspell/atypes.hxx
+++ b/src/myspell/atypes.hxx
@@ -1,34 +1,74 @@
 #ifndef _ATYPES_HXX_
 #define _ATYPES_HXX_
 
+// HUNSTEM def.
+#define HUNSTEM
+
+#include "csutil.hxx"
+#include "hashmgr.hxx"
+
 #define SETSIZE         256
-#define MAXAFFIXES      256
+#define CONTSIZE        65536
 #define MAXWORDLEN      100
-#define XPRODUCT        (1 << 0)
+#define MAXWORDUTF8LEN  (MAXWORDLEN * 4)
+
+// affentry options
+#define aeXPRODUCT      (1 << 0)
+#define aeUTF8          (1 << 1)
+#define aeALIASF        (1 << 2)
+#define aeALIASM        (1 << 3)
+
+enum {IN_CPD_NOT, IN_CPD_BEGIN, IN_CPD_END, IN_CPD_OTHER};
+
+#define MAXLNLEN        8192 * 4
 
-#define MAXLNLEN        1024
+#define MAXCOMPOUND	10
 
-#define TESTAFF( a , b , c ) memchr((void *)(a), (int)(b), (size_t)(c) )
+#define MAXACC          1000
+
+#define FLAG unsigned short
+#define FLAG_NULL 0x00
+#define FREE_FLAG(a) a = 0
+
+#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c)
 
 struct affentry
 {
    char * strip;
    char * appnd;
-   short  stripl;
-   short  appndl;
-   short  numconds;
-   short  xpflg;
-   char   achar;
-   char   conds[SETSIZE];
+   unsigned char stripl;
+   unsigned char appndl;
+   char  numconds;
+   char  opts;
+   unsigned short aflag;
+   union {
+   	char   base[SETSIZE];
+	struct {
+		char ascii[SETSIZE/2];
+                char neg[8];
+                char all[8];
+                w_char * wchars[8];
+		int wlen[8];
+	} utf8;
+   } conds;
+   char *       morphcode;
+   unsigned short * contclass;
+   short        contclasslen;
 };
 
 struct replentry {
   char * pattern;
-  char * replacement;
+  char * pattern2;
 };
 
 struct mapentry {
   char * set;
+  w_char * set_utf16;
+  int len;
+};
+
+struct flagentry {
+  FLAG * def;
   int len;
 };
 
diff --git a/src/myspell/baseaffix.hxx b/src/myspell/baseaffix.hxx
index 6aa4351..da7c010 100644
--- a/src/myspell/baseaffix.hxx
+++ b/src/myspell/baseaffix.hxx
@@ -3,15 +3,29 @@
 
 class AffEntry
 {
+public:
+
 protected:
        char *       appnd;
        char *       strip;
-       short        appndl;
-       short        stripl;
-       short        numconds;
-       short        xpflg;
-       char         achar;
-       char         conds[SETSIZE];
+       unsigned char  appndl;
+       unsigned char  stripl;
+       char         numconds;
+       char  opts;
+       unsigned short aflag;
+       union {
+   	 char   base[SETSIZE];
+	 struct {
+		char  ascii[SETSIZE/2];
+                char neg[8];
+                char all[8];
+                w_char * wchars[8];
+		int wlen[8];
+	 } utf8;
+       } conds;
+       char *       morphcode;
+       unsigned short * contclass;
+       short        contclasslen;
 };
 
 #endif
diff --git a/src/myspell/csutil.cxx b/src/myspell/csutil.cxx
index 73065f1..4fe2fbf 100644
--- a/src/myspell/csutil.cxx
+++ b/src/myspell/csutil.cxx
@@ -1,178 +1,497 @@
 #include <cstdlib>
 #include <cstring>
+#include <cctype>
 #include <cstdio>
 #include "csutil.hxx"
 
-#ifndef WINDOWS
-using namespace std;
-#endif
+#include "atypes.hxx"
+#include "langnum.hxx"
 
-// strip strings into token based on single char delimiter
-// acts like strsep() but only uses a delim char and not 
-// a delim string
-
-char * mystrsep(char ** stringp, const char delim)
-{
-  char * rv = NULL;
-  char * mp = *stringp;
-  int n = strlen(mp);
-  if (n > 0) {
-     char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
-     if (dp) {
-        *stringp = dp+1;
-        int nc = (int)((unsigned long)dp - (unsigned long)mp); 
-        rv = (char *) malloc(nc+1);
-        memcpy(rv,mp,nc);
-        *(rv+nc) = '\0';
-        return rv;
-     } else {
-       rv = (char *) malloc(n+1);
-       memcpy(rv, mp, n);
-       *(rv+n) = '\0';
-       *stringp = mp + n;
-       return rv;
-     }
-  }
-  return NULL;
-}
+#include "utf_info.cxx"
+#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
 
+#ifndef W32
+using namespace std;
+#endif
 
-// replaces strdup with ansi version
-char * mystrdup(const char * s)
-{
-  char * d = NULL;
-  if (s) {
-     int sl = strlen(s);
-     d = (char *) malloc(((sl+1) * sizeof(char)));
-     if (d) memcpy(d,s,((sl+1)*sizeof(char)));
-  }
-  return d;
+/* only UTF-16 (BMP) implementation */
+char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
+    char * u8 = dest;
+    char * u8_max = u8 + size;
+    const w_char * u2 = src;
+    const w_char * u2_max = src + srclen;
+    while ((u2 < u2_max) && (u8 < u8_max)) {
+        if (u2->h) { // > 0xFF
+            // XXX 4-byte haven't implemented yet.
+            if (u2->h >= 0x08) {   // >= 0x800 (3-byte UTF-8 character)
+                *u8 = 0xe0 + (u2->h >> 4);
+                u8++;
+                if (u8 < u8_max) {
+                    *u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6);
+                    u8++;
+                    if (u8 < u8_max) {
+                        *u8 = 0x80 + (u2->l & 0x3f);
+                        u8++;
+                    }
+                }
+            } else { // < 0x800 (2-byte UTF-8 character)
+                *u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6);
+                u8++;
+                if (u8 < u8_max) {
+                    *u8 = 0x80 + (u2->l & 0x3f);
+                    u8++;
+                }
+            }
+        } else { // <= 0xFF
+            if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character)
+                *u8 = 0xc0 + (u2->l >> 6);
+                u8++;
+                if (u8 < u8_max) {
+                    *u8 = 0x80 + (u2->l & 0x3f);
+                    u8++;
+                }
+            } else { // < 0x80 (1-byte UTF-8 character)
+                *u8 = u2->l;
+                u8++;
+            }
+        }
+        u2++;
+    }
+    *u8 = '\0';
+    return dest;
 }
 
 
-// remove cross-platform text line end characters
-void mychomp(char * s)
-{
-  int k = strlen(s);
-  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
-  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+/* only UTF-16 (BMP) implementation */
+int u8_u16(w_char * dest, int size, const char * src) {
+    const char * u8 = src;
+    w_char * u2 = dest;
+    w_char * u2_max = u2 + size;
+    
+    while (*u8 && (u2 < u2_max)) {
+    switch ((*u8) & 0xf0) {
+        case 0x00:
+        case 0x10:
+        case 0x20:
+        case 0x30:
+        case 0x40:
+        case 0x50:
+        case 0x60:
+        case 0x70: {
+            u2->h = 0;
+            u2->l = *u8;
+            break;
+        }
+        case 0x80:
+        case 0x90:
+        case 0xa0:
+        case 0xb0: {
+            fprintf(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %d. character position\n%s\n", u8 - src, src);    
+            u2->h = 0xff;
+            u2->l = 0xfd;
+            break;
+        }
+        case 0xc0:
+        case 0xd0: {    // 2-byte UTF-8 codes
+            if ((*(u8+1) & 0xc0) == 0x80) {
+	        u2->h = (*u8 & 0x1f) >> 2;
+                u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
+	        u8++;
+            } else {
+                fprintf(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+                u2->h = 0xff;
+                u2->l = 0xfd;
+            }
+            break;
+        }
+        case 0xe0: {    // 3-byte UTF-8 codes
+            if ((*(u8+1) & 0xc0) == 0x80) {
+	        u2->h = ((*u8 & 0x0f) << 4) + ((*(u8+1) & 0x3f) >> 2);
+                u8++;
+                if ((*(u8+1) & 0xc0) == 0x80) {
+                    u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
+	            u8++;
+                } else {
+                    fprintf(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+                    u2->h = 0xff;
+                    u2->l = 0xfd;
+                }
+            } else {
+                fprintf(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+                u2->h = 0xff;
+                u2->l = 0xfd;
+            }
+            break;
+        }
+        case 0xf0: {    // 4 or more byte UTF-8 codes
+            fprintf(stderr, "This UTF-8 encoding can't convert to UTF-16:\n%s\n", src);
+            u2->h = 0xff;
+            u2->l = 0xfd;
+            break;            
+        }
+    }
+    u8++;
+    u2++;
+    }
+    return u2 - dest;
 }
 
-
-//  does an ansi strdup of the reverse of a string
-char * myrevstrdup(const char * s)
-{
-    char * d = NULL;
-    if (s) {
-       int sl = strlen(s);
-       d = (char *) malloc((sl+1) * sizeof(char));
-       if (d) {
-	 const char * p = s + sl - 1;
-         char * q = d;
-         while (p >= s) *q++ = *p--;
-         *q = '\0';
+void flag_qsort(unsigned short flags[], int begin, int end) {
+    unsigned short reg;
+    if (end > begin) {
+        unsigned short pivot = flags[begin];
+        int l = begin + 1;
+        int r = end;
+        while(l < r) {
+            if (flags[l] <= pivot) {
+                l++;
+            } else {
+             r--;
+	     reg = flags[l];
+	     flags[l] = flags[r];
+	     flags[r] = reg;
+          }
        }
+       l--;
+       reg = flags[begin];
+       flags[begin] = flags[l];
+       flags[l] = reg;
+
+       flag_qsort(flags, begin, l);
+       flag_qsort(flags, r, end);
     }
-    return d; 
-}
+ }
 
-#if 0
-// return 1 if s1 is a leading subset of s2
-int isSubset(const char * s1, const char * s2)
-{
-  int l1 = strlen(s1);
-  int l2 = strlen(s2);
-  if (l1 > l2) return 0;
-  if (strncmp(s2,s1,l1) == 0) return 1;
-  return 0;
+int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
+   int mid;
+   int left = 0;
+   int right = length - 1;
+   while (left <= right) {
+      mid = (left + right) / 2;
+      if (flags[mid] == flag) return 1;
+      if (flag < flags[mid]) right = mid - 1;
+      else left = mid + 1;
+   }
+   return 0;
 }
-#endif
 
+ // strip strings into token based on single char delimiter
+ // acts like strsep() but only uses a delim char and not
+ // a delim string
+ // default delimiter: white space characters
+ 
+ char * mystrsep(char ** stringp, const char delim)
+ {
+   char * rv = NULL;
+   char * mp = *stringp;
+   int n = strlen(mp);
+   if (n > 0) {
+      char * dp;
+      if (delim) {
+        dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+      } else {
+        for (dp = mp; (*dp && !isspace(*dp)); dp++);
+        if (!*dp) dp = NULL;
+      }
+      if (dp) {
+         *stringp = dp+1;
+         int nc = (int)((unsigned long)dp - (unsigned long)mp);
+         rv = (char *) malloc(nc+1);
+         memcpy(rv,mp,nc);
+         *(rv+nc) = '\0';
+         return rv;
+      } else {
+        rv = (char *) malloc(n+1);
+        memcpy(rv, mp, n);
+        *(rv+n) = '\0';
+        *stringp = mp + n;
+        return rv;
+      }
+   }
+   return NULL;
+ }
 
-// return 1 if s1 is a leading subset of s2
-int isSubset(const char * s1, const char * s2)
-{
-  while( *s1 && *s2 && (*s1 == *s2) ) {
-    s1++;
-    s2++;
-  }
-  return (*s1 == '\0');
-}
+ 
+ // replaces strdup with ansi version
+ char * mystrdup(const char * s)
+ {
+   char * d = NULL;
+   if (s) {
+      int sl = strlen(s);
+      d = (char *) malloc(((sl+1) * sizeof(char)));
+      if (d) memcpy(d,s,((sl+1)*sizeof(char)));
+   }
+   return d;
+ }
+ 
+ 
+ // remove cross-platform text line end characters
+ void mychomp(char * s)
+ {
+   int k = strlen(s);
+   if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+   if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+ }
+ 
+ 
+ //  does an ansi strdup of the reverse of a string
+ char * myrevstrdup(const char * s)
+ {
+     char * d = NULL;
+     if (s) {
+        int sl = strlen(s);
+        d = (char *) malloc((sl+1) * sizeof(char));
+        if (d) {
+          const char * p = s + sl - 1;
+          char * q = d;
+          while (p >= s) *q++ = *p--;
+          *q = '\0';
+        }
+     }
+     return d;
+ }
 
 
-// return 1 if s1 (reversed) is a leading subset of end of s2
-int isRevSubset(const char * s1, const char * end_of_s2, int len)
-{
-  while( (len > 0) && *s1 && (*s1 == *end_of_s2) ) {
-    s1++;
-    end_of_s2--;
-    len --;
-  }
-  return (*s1 == '\0');
-}
+ // return 1 if s1 is a leading subset of s2
+ int isSubset(const char * s1, const char * s2)
+ {
+    while ((*s1 == *s2) && *s1) {
+        s1++;
+        s2++;
+    }
+    return (*s1 == '\0');
+ }
 
 
-// convert null terminated string to all caps using encoding 
-void enmkallcap(char * d, const char * p, const char * encoding)
-{
-  struct cs_info * csconv = get_current_cs(encoding);
-  while (*p != '\0') { 
-    *d++ = csconv[((unsigned char) *p)].cupper;
-    p++;
-  }
-  *d = '\0';
-}
+ // return 1 if s1 (reversed) is a leading subset of end of s2
+ int isRevSubset(const char * s1, const char * end_of_s2, int len)
+ {
+    while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
+        s1++;
+        end_of_s2--;
+        len--;
+    }
+    return (*s1 == '\0');
+ }
 
+ // convert null terminated string to all caps using encoding
+ void enmkallcap(char * d, const char * p, const char * encoding)
+ 
+ {
+   struct cs_info * csconv = get_current_cs(encoding);
+   while (*p != '\0') {
+     *d++ = csconv[((unsigned char) *p)].cupper;
+     p++;
+   }
+   *d = '\0';
+ }
 
-// convert null terminated string to all little using encoding
-void enmkallsmall(char * d, const char * p, const char * encoding)
-{
-  struct cs_info * csconv = get_current_cs(encoding);
-  while (*p != '\0') { 
-    *d++ = csconv[((unsigned char) *p)].clower;
-    p++;
-  }
-  *d = '\0';
-}
 
+ // append s to ends of every lines in text
+ void strlinecat(char * dest, const char * s)
+ {
+    char * dup = mystrdup(dest);
+    char * source = dup;
+    int len = strlen(s);
+    while (*source) {
+        if (*source == '\n') {
+            strncpy(dest, s, len);
+            dest += len;
+        }
+        *dest = *source;
+        source++; dest++;
+    }
+    strcpy(dest, s);
+    free(dup);
+ }
 
-// convert null terminated string to have intial capital using encoding
-void enmkinitcap(char * d, const char * p, const char * encoding)
-{
-  struct cs_info * csconv = get_current_cs(encoding);
-  memcpy(d,p,(strlen(p)+1));
-  if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
+// break text to lines
+// return number of lines
+int line_tok(const char * text, char *** lines) {
+    int linenum = 0;
+    char * dup = mystrdup(text);
+    char * p = dup;
+    while ((p = strchr(p, '\n'))) {
+        linenum++;
+        *p = '\0';
+        p++;
+        if (*p == '\0') break;
+    }
+    *lines = (char **) calloc(linenum + 1, sizeof(char *));
+    if (!(*lines)) return -1;
+
+    p = dup; 
+    for (int i = 0; i < linenum + 1; i++) {
+        (*lines)[i] = mystrdup(p);
+        p += strlen(p) + 1;
+    }
+    free(dup);
+    return linenum;
 }
 
+// uniq line in place
+char * line_uniq(char * text) {
+    char ** lines;
+    char linenum = line_tok(text, &lines);
+    int i;
+    strcpy(text, lines[0]);
+    for ( i = 1; i<=linenum; i++ ) {
+        int dup = 0;
+        for (int j = 0; j < i; j++) {
+            if (strcmp(lines[i], lines[j]) == 0) dup = 1;
+        }
+        if (!dup) {
+            if ((i > 1) || (*(lines[0]) != '\0')) strcat(text, "\n");
+            strcat(text, lines[i]);
+        }
+    }
+    for ( i = 0; i<=linenum; i++ ) {
+        if (lines[i]) free(lines[i]);
+    }
+    if (lines) free(lines);
+    return text;
+}
 
-// convert null terminated string to all caps 
-void mkallcap(char * p, const struct cs_info * csconv)
-{
-  while (*p != '\0') { 
-    *p = csconv[((unsigned char) *p)].cupper;
-    p++;
-  }
+// change \n to char c
+char * line_join(char * text, char c) {
+    char * p;
+    for (p = text; *p; p++) if (*p == '\n') *p = c;
+    return text;
 }
 
+// leave only last {[^}]*} substring for handling zero morphemes
+char * delete_zeros(char * morphout) {
+    char * p = morphout;
+    char * q = p;
+    char * q2 = NULL;
+    int suffix = 0;
+    
+    for (;*p && *(p+1);) {
+        switch (*p) {
+            case '{': 
+                q2 = q;
+                q--;
+                break;
+            case '}':
+                if (q2) {
+                    suffix = 1;
+                    q--;
+                }
+                break; 
+            default:
+                if (suffix) {
+                    q = q2;
+                }
+                suffix = 0;
+                *q = *p;
+        }
+        p++;
+        q++;
+    }
+    *q = '\0';
+    return morphout;
+}
 
-// convert null terminated string to all little
-void mkallsmall(char * p, const struct cs_info * csconv)
-{
-  while (*p != '\0') { 
-    *p = csconv[((unsigned char) *p)].clower;
-    p++;
-  }
+char * mystrrep(char * word, const char * pat, const char * rep) {
+    char * pos = strstr(word, pat);
+    if (pos) {
+        int replen = strlen(rep);
+        int patlen = strlen(pat);
+        if (replen < patlen) {
+            char * end = word + strlen(word);
+            char * next = pos + replen;
+            char * prev = pos + strlen(pat);
+            for (; prev < end; *next = *prev, prev++, next++);
+            *next = '\0';
+        } else if (replen > patlen) {
+            char * end = pos + patlen;
+            char * next = word + strlen(word) + replen - patlen;
+            char * prev = next - replen + patlen;
+            for (; prev >= end; *next = *prev, prev--, next--);
+        }
+        strncpy(pos, rep, replen);
+    }
+    return word;
 }
 
+ // convert null terminated string to all little using encoding
+ void enmkallsmall(char * d, const char * p, const char * encoding)
+ {
+   struct cs_info * csconv = get_current_cs(encoding);
+   while (*p != '\0') {
+     *d++ = csconv[((unsigned char) *p)].clower;
+     p++;
+   }
+   *d = '\0';
+ }
+ // convert null terminated string to have intial capital using encoding
+ void enmkinitcap(char * d, const char * p, const char * encoding)
+ {
+   struct cs_info * csconv = get_current_cs(encoding);
+   memcpy(d,p,(strlen(p)+1));
+   if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper;
+ }
+ 
+ 
+ // convert null terminated string to all caps
+ void mkallcap(char * p, const struct cs_info * csconv)
+ {
+   while (*p != '\0') {
+     *p = csconv[((unsigned char) *p)].cupper;
+     p++;
+   }
+ }
+ 
+ 
+ // convert null terminated string to all little
+ void mkallsmall(char * p, const struct cs_info * csconv)
+ {
+   while (*p != '\0') {
+     *p = csconv[((unsigned char) *p)].clower;
+     p++;
+   }
+ }
 
-// convert null terminated string to have intial capital
-void mkinitcap(char * p, const struct cs_info * csconv)
-{
-  if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
+void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv) {
+    for (int i = 0; i < nc; i++) {
+        unsigned short idx = (u[i].h << 8) + u[i].l;
+        if (idx != utfconv[idx].clower) {
+            u[i].h = (unsigned char) (utfconv[idx].clower >> 8);
+            u[i].l = (unsigned char) (utfconv[idx].clower & 0x00FF);
+        }
+    }
 }
+ 
+ // convert null terminated string to have intial capital
+ void mkinitcap(char * p, const struct cs_info * csconv)
+ {
+   if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
+ }
 
+ // reverse word 
+ void reverseword(char * word) {
+   char r;
+   for (char * dest = word + strlen(word) - 1; word < dest; word++, dest--) {
+     r=*word;
+     *word = *dest;
+     *dest = r;
+   }
+ }
 
-
+ // reverse word 
+ void reverseword_utf(char * word) {
+   w_char w[MAXWORDLEN];
+   w_char * p;
+   w_char r;
+   int l = u8_u16(w, MAXWORDLEN, word);
+   p = w;
+   for (w_char * dest = w + l - 1; p < dest; p++, dest--) {
+     r=*p;
+     *p = *dest;
+     *dest = r;
+   }
+   u16_u8(word, MAXWORDUTF8LEN, w, l);
+ }
 
 // these are simple character mappings for the 
 // encodings supported
@@ -3029,7 +3348,7 @@ struct cs_info koi8r_tbl[] = {
 { 0x01, 0xdf, 0xff },
 };
 
-struct cs_info cp1251_tbl[] = {
+struct cs_info koi8u_tbl[] = {
 { 0x00, 0x00, 0x00 },
 { 0x00, 0x01, 0x01 },
 { 0x00, 0x02, 0x02 },
@@ -3193,27 +3512,27 @@ struct cs_info cp1251_tbl[] = {
 { 0x00, 0xa0, 0xa0 },
 { 0x00, 0xa1, 0xa1 },
 { 0x00, 0xa2, 0xa2 },
-{ 0x00, 0xa3, 0xa3 },
-{ 0x00, 0xa4, 0xa4 },
+{ 0x00, 0xa3, 0xb3 },
+{ 0x00, 0xa4, 0xb4 }, /* ie */
 { 0x00, 0xa5, 0xa5 },
-{ 0x00, 0xa6, 0xa6 },
-{ 0x00, 0xa7, 0xa7 },
+{ 0x00, 0xa6, 0xb6 }, /* i */
+{ 0x00, 0xa7, 0xb7 }, /* ii */
 { 0x00, 0xa8, 0xa8 },
 { 0x00, 0xa9, 0xa9 },
 { 0x00, 0xaa, 0xaa },
 { 0x00, 0xab, 0xab },
 { 0x00, 0xac, 0xac },
-{ 0x00, 0xad, 0xad },
+{ 0x00, 0xad, 0xbd }, /* g'' */
 { 0x00, 0xae, 0xae },
 { 0x00, 0xaf, 0xaf },
 { 0x00, 0xb0, 0xb0 },
 { 0x00, 0xb1, 0xb1 },
 { 0x00, 0xb2, 0xb2 },
-{ 0x00, 0xb3, 0xb3 },
-{ 0x00, 0xb4, 0xb4 },
+{ 0x01, 0xa3, 0xb3 },
+{ 0x00, 0xb4, 0xb4 }, /* IE */
 { 0x00, 0xb5, 0xb5 },
-{ 0x00, 0xb6, 0xb6 },
-{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb6, 0xb6 }, /* I */
+{ 0x00, 0xb7, 0xb7 }, /* II */
 { 0x00, 0xb8, 0xb8 },
 { 0x00, 0xb9, 0xb9 },
 { 0x00, 0xba, 0xba },
@@ -3222,72 +3541,591 @@ struct cs_info cp1251_tbl[] = {
 { 0x00, 0xbd, 0xbd },
 { 0x00, 0xbe, 0xbe },
 { 0x00, 0xbf, 0xbf },
-{ 0x00, 0xc0, 0xc0 },
-{ 0x00, 0xc1, 0xc1 },
-{ 0x00, 0xc2, 0xc2 },
-{ 0x00, 0xc3, 0xc3 },
-{ 0x00, 0xc4, 0xc4 },
-{ 0x00, 0xc5, 0xc5 },
-{ 0x00, 0xc6, 0xc6 },
-{ 0x00, 0xc7, 0xc7 },
-{ 0x00, 0xc8, 0xc8 },
-{ 0x00, 0xc9, 0xc9 },
-{ 0x00, 0xca, 0xca },
-{ 0x00, 0xcb, 0xcb },
-{ 0x00, 0xcc, 0xcc },
-{ 0x00, 0xcd, 0xcd },
-{ 0x00, 0xce, 0xce },
-{ 0x00, 0xcf, 0xcf },
-{ 0x00, 0xd0, 0xd0 },
-{ 0x00, 0xd1, 0xd1 },
-{ 0x00, 0xd2, 0xd2 },
-{ 0x00, 0xd3, 0xd3 },
-{ 0x00, 0xd4, 0xd4 },
-{ 0x00, 0xd5, 0xd5 },
-{ 0x00, 0xd6, 0xd6 },
-{ 0x00, 0xd7, 0xd7 },
-{ 0x00, 0xd8, 0xd8 },
-{ 0x00, 0xd9, 0xd9 },
-{ 0x00, 0xda, 0xda },
-{ 0x00, 0xdb, 0xdb },
-{ 0x00, 0xdc, 0xdc },
-{ 0x00, 0xdd, 0xdd },
-{ 0x00, 0xde, 0xde },
-{ 0x00, 0xdf, 0xdf },
-{ 0x00, 0xe0, 0xe0 },
-{ 0x00, 0xe1, 0xe1 },
-{ 0x00, 0xe2, 0xe2 },
-{ 0x00, 0xe3, 0xe3 },
-{ 0x00, 0xe4, 0xe4 },
-{ 0x00, 0xe5, 0xe5 },
-{ 0x00, 0xe6, 0xe6 },
-{ 0x00, 0xe7, 0xe7 },
-{ 0x00, 0xe8, 0xe8 },
-{ 0x00, 0xe9, 0xe9 },
-{ 0x00, 0xea, 0xea },
-{ 0x00, 0xeb, 0xeb },
-{ 0x00, 0xec, 0xec },
-{ 0x00, 0xed, 0xed },
-{ 0x00, 0xee, 0xee },
-{ 0x00, 0xef, 0xef },
-{ 0x00, 0xf0, 0xf0 },
-{ 0x00, 0xf1, 0xf1 },
-{ 0x00, 0xf2, 0xf2 },
-{ 0x00, 0xf3, 0xf3 },
-{ 0x00, 0xf4, 0xf4 },
-{ 0x00, 0xf5, 0xf5 },
-{ 0x00, 0xf6, 0xf6 },
-{ 0x00, 0xf7, 0xf7 },
-{ 0x00, 0xf8, 0xf8 },
-{ 0x00, 0xf9, 0xf9 },
-{ 0x00, 0xfa, 0xfa },
-{ 0x00, 0xfb, 0xfb },
-{ 0x00, 0xfc, 0xfc },
-{ 0x00, 0xfd, 0xfd },
-{ 0x00, 0xfe, 0xfe },
-{ 0x00, 0xff, 0xff },
+{ 0x00, 0xc0, 0xe0 },
+{ 0x00, 0xc1, 0xe1 },
+{ 0x00, 0xc2, 0xe2 },
+{ 0x00, 0xc3, 0xe3 },
+{ 0x00, 0xc4, 0xe4 },
+{ 0x00, 0xc5, 0xe5 },
+{ 0x00, 0xc6, 0xe6 },
+{ 0x00, 0xc7, 0xe7 },
+{ 0x00, 0xc8, 0xe8 },
+{ 0x00, 0xc9, 0xe9 },
+{ 0x00, 0xca, 0xea },
+{ 0x00, 0xcb, 0xeb },
+{ 0x00, 0xcc, 0xec },
+{ 0x00, 0xcd, 0xed },
+{ 0x00, 0xce, 0xee },
+{ 0x00, 0xcf, 0xef },
+{ 0x00, 0xd0, 0xf0 },
+{ 0x00, 0xd1, 0xf1 },
+{ 0x00, 0xd2, 0xf2 },
+{ 0x00, 0xd3, 0xf3 },
+{ 0x00, 0xd4, 0xf4 },
+{ 0x00, 0xd5, 0xf5 },
+{ 0x00, 0xd6, 0xf6 },
+{ 0x00, 0xd7, 0xf7 },
+{ 0x00, 0xd8, 0xf8 },
+{ 0x00, 0xd9, 0xf9 },
+{ 0x00, 0xda, 0xfa },
+{ 0x00, 0xdb, 0xfb },
+{ 0x00, 0xdc, 0xfc },
+{ 0x00, 0xdd, 0xfd },
+{ 0x00, 0xde, 0xfe },
+{ 0x00, 0xdf, 0xff },
+{ 0x01, 0xc0, 0xe0 },
+{ 0x01, 0xc1, 0xe1 },
+{ 0x01, 0xc2, 0xe2 },
+{ 0x01, 0xc3, 0xe3 },
+{ 0x01, 0xc4, 0xe4 },
+{ 0x01, 0xc5, 0xe5 },
+{ 0x01, 0xc6, 0xe6 },
+{ 0x01, 0xc7, 0xe7 },
+{ 0x01, 0xc8, 0xe8 },
+{ 0x01, 0xc9, 0xe9 },
+{ 0x01, 0xca, 0xea },
+{ 0x01, 0xcb, 0xeb },
+{ 0x01, 0xcc, 0xec },
+{ 0x01, 0xcd, 0xed },
+{ 0x01, 0xce, 0xee },
+{ 0x01, 0xcf, 0xef },
+{ 0x01, 0xd0, 0xf0 },
+{ 0x01, 0xd1, 0xf1 },
+{ 0x01, 0xd2, 0xf2 },
+{ 0x01, 0xd3, 0xf3 },
+{ 0x01, 0xd4, 0xf4 },
+{ 0x01, 0xd5, 0xf5 },
+{ 0x01, 0xd6, 0xf6 },
+{ 0x01, 0xd7, 0xf7 },
+{ 0x01, 0xd8, 0xf8 },
+{ 0x01, 0xd9, 0xf9 },
+{ 0x01, 0xda, 0xfa },
+{ 0x01, 0xdb, 0xfb },
+{ 0x01, 0xdc, 0xfc },
+{ 0x01, 0xdd, 0xfd },
+{ 0x01, 0xde, 0xfe },
+{ 0x01, 0xdf, 0xff },
+};
+
+struct cs_info cp1251_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0a, 0x0a },
+{ 0x00, 0x0b, 0x0b },
+{ 0x00, 0x0c, 0x0c },
+{ 0x00, 0x0d, 0x0d },
+{ 0x00, 0x0e, 0x0e },
+{ 0x00, 0x0f, 0x0f },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1a, 0x1a },
+{ 0x00, 0x1b, 0x1b },
+{ 0x00, 0x1c, 0x1c },
+{ 0x00, 0x1d, 0x1d },
+{ 0x00, 0x1e, 0x1e },
+{ 0x00, 0x1f, 0x1f },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2a, 0x2a },
+{ 0x00, 0x2b, 0x2b },
+{ 0x00, 0x2c, 0x2c },
+{ 0x00, 0x2d, 0x2d },
+{ 0x00, 0x2e, 0x2e },
+{ 0x00, 0x2f, 0x2f },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3a, 0x3a },
+{ 0x00, 0x3b, 0x3b },
+{ 0x00, 0x3c, 0x3c },
+{ 0x00, 0x3d, 0x3d },
+{ 0x00, 0x3e, 0x3e },
+{ 0x00, 0x3f, 0x3f },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6a, 0x4a },
+{ 0x01, 0x6b, 0x4b },
+{ 0x01, 0x6c, 0x4c },
+{ 0x01, 0x6d, 0x4d },
+{ 0x01, 0x6e, 0x4e },
+{ 0x01, 0x6f, 0x4f },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7a, 0x5a },
+{ 0x00, 0x5b, 0x5b },
+{ 0x00, 0x5c, 0x5c },
+{ 0x00, 0x5d, 0x5d },
+{ 0x00, 0x5e, 0x5e },
+{ 0x00, 0x5f, 0x5f },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6a, 0x4a },
+{ 0x00, 0x6b, 0x4b },
+{ 0x00, 0x6c, 0x4c },
+{ 0x00, 0x6d, 0x4d },
+{ 0x00, 0x6e, 0x4e },
+{ 0x00, 0x6f, 0x4f },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7a, 0x5a },
+{ 0x00, 0x7b, 0x7b },
+{ 0x00, 0x7c, 0x7c },
+{ 0x00, 0x7d, 0x7d },
+{ 0x00, 0x7e, 0x7e },
+{ 0x00, 0x7f, 0x7f },
+{ 0x01, 0x90, 0x80 },
+{ 0x01, 0x83, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x81 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x01, 0x9a, 0x8a },
+{ 0x00, 0x8b, 0x8b },
+{ 0x01, 0x9c, 0x8c },
+{ 0x01, 0x9d, 0x8d },
+{ 0x01, 0x9e, 0x8e },
+{ 0x01, 0x9f, 0x8f },
+{ 0x00, 0x90, 0x80 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9a, 0x8a },
+{ 0x00, 0x9b, 0x9b },
+{ 0x00, 0x9c, 0x8c },
+{ 0x00, 0x9d, 0x8d },
+{ 0x00, 0x9e, 0x8e },
+{ 0x00, 0x9f, 0x8f },
+{ 0x00, 0xa0, 0xa0 },
+{ 0x01, 0xa2, 0xa1 },
+{ 0x00, 0xa2, 0xa1 },
+{ 0x01, 0xbc, 0xa3 },
+{ 0x00, 0xa4, 0xa4 },
+{ 0x01, 0xb4, 0xa5 },
+{ 0x00, 0xa6, 0xa6 },
+{ 0x00, 0xa7, 0xa7 },
+{ 0x01, 0xb8, 0xa8 },
+{ 0x00, 0xa9, 0xa9 },
+{ 0x01, 0xba, 0xaa },
+{ 0x00, 0xab, 0xab },
+{ 0x00, 0xac, 0xac },
+{ 0x00, 0xad, 0xad },
+{ 0x00, 0xae, 0xae },
+{ 0x01, 0xbf, 0xaf },
+{ 0x00, 0xb0, 0xb0 },
+{ 0x00, 0xb1, 0xb1 },
+{ 0x01, 0xb3, 0xb2 },
+{ 0x00, 0xb3, 0xb2 },
+{ 0x00, 0xb4, 0xa5 },
+{ 0x00, 0xb5, 0xb5 },
+{ 0x00, 0xb6, 0xb6 },
+{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb8, 0xa8 },
+{ 0x00, 0xb9, 0xb9 },
+{ 0x00, 0xba, 0xaa },
+{ 0x00, 0xbb, 0xbb },
+{ 0x00, 0xbc, 0xa3 },
+{ 0x01, 0xbe, 0xbd },
+{ 0x00, 0xbe, 0xbd },
+{ 0x00, 0xbf, 0xaf },
+{ 0x01, 0xe0, 0xc0 },
+{ 0x01, 0xe1, 0xc1 },
+{ 0x01, 0xe2, 0xc2 },
+{ 0x01, 0xe3, 0xc3 },
+{ 0x01, 0xe4, 0xc4 },
+{ 0x01, 0xe5, 0xc5 },
+{ 0x01, 0xe6, 0xc6 },
+{ 0x01, 0xe7, 0xc7 },
+{ 0x01, 0xe8, 0xc8 },
+{ 0x01, 0xe9, 0xc9 },
+{ 0x01, 0xea, 0xca },
+{ 0x01, 0xeb, 0xcb },
+{ 0x01, 0xec, 0xcc },
+{ 0x01, 0xed, 0xcd },
+{ 0x01, 0xee, 0xce },
+{ 0x01, 0xef, 0xcf },
+{ 0x01, 0xf0, 0xd0 },
+{ 0x01, 0xf1, 0xd1 },
+{ 0x01, 0xf2, 0xd2 },
+{ 0x01, 0xf3, 0xd3 },
+{ 0x01, 0xf4, 0xd4 },
+{ 0x01, 0xf5, 0xd5 },
+{ 0x01, 0xf6, 0xd6 },
+{ 0x01, 0xf7, 0xd7 },
+{ 0x01, 0xf8, 0xd8 },
+{ 0x01, 0xf9, 0xd9 },
+{ 0x01, 0xfa, 0xda },
+{ 0x01, 0xfb, 0xdb },
+{ 0x01, 0xfc, 0xdc },
+{ 0x01, 0xfd, 0xdd },
+{ 0x01, 0xfe, 0xde },
+{ 0x01, 0xff, 0xdf },
+{ 0x00, 0xe0, 0xc0 },
+{ 0x00, 0xe1, 0xc1 },
+{ 0x00, 0xe2, 0xc2 },
+{ 0x00, 0xe3, 0xc3 },
+{ 0x00, 0xe4, 0xc4 },
+{ 0x00, 0xe5, 0xc5 },
+{ 0x00, 0xe6, 0xc6 },
+{ 0x00, 0xe7, 0xc7 },
+{ 0x00, 0xe8, 0xc8 },
+{ 0x00, 0xe9, 0xc9 },
+{ 0x00, 0xea, 0xca },
+{ 0x00, 0xeb, 0xcb },
+{ 0x00, 0xec, 0xcc },
+{ 0x00, 0xed, 0xcd },
+{ 0x00, 0xee, 0xce },
+{ 0x00, 0xef, 0xcf },
+{ 0x00, 0xf0, 0xd0 },
+{ 0x00, 0xf1, 0xd1 },
+{ 0x00, 0xf2, 0xd2 },
+{ 0x00, 0xf3, 0xd3 },
+{ 0x00, 0xf4, 0xd4 },
+{ 0x00, 0xf5, 0xd5 },
+{ 0x00, 0xf6, 0xd6 },
+{ 0x00, 0xf7, 0xd7 },
+{ 0x00, 0xf8, 0xd8 },
+{ 0x00, 0xf9, 0xd9 },
+{ 0x00, 0xfa, 0xda },
+{ 0x00, 0xfb, 0xdb },
+{ 0x00, 0xfc, 0xdc },
+{ 0x00, 0xfd, 0xdd },
+{ 0x00, 0xfe, 0xde },
+{ 0x00, 0xff, 0xdf },
 };
 
+struct cs_info iso13_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0A, 0x0A },
+{ 0x00, 0x0B, 0x0B },
+{ 0x00, 0x0C, 0x0C },
+{ 0x00, 0x0D, 0x0D },
+{ 0x00, 0x0E, 0x0E },
+{ 0x00, 0x0F, 0x0F },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1A, 0x1A },
+{ 0x00, 0x1B, 0x1B },
+{ 0x00, 0x1C, 0x1C },
+{ 0x00, 0x1D, 0x1D },
+{ 0x00, 0x1E, 0x1E },
+{ 0x00, 0x1F, 0x1F },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2A, 0x2A },
+{ 0x00, 0x2B, 0x2B },
+{ 0x00, 0x2C, 0x2C },
+{ 0x00, 0x2D, 0x2D },
+{ 0x00, 0x2E, 0x2E },
+{ 0x00, 0x2F, 0x2F },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3A, 0x3A },
+{ 0x00, 0x3B, 0x3B },
+{ 0x00, 0x3C, 0x3C },
+{ 0x00, 0x3D, 0x3D },
+{ 0x00, 0x3E, 0x3E },
+{ 0x00, 0x3F, 0x3F },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6A, 0x4A },
+{ 0x01, 0x6B, 0x4B },
+{ 0x01, 0x6C, 0x4C },
+{ 0x01, 0x6D, 0x4D },
+{ 0x01, 0x6E, 0x4E },
+{ 0x01, 0x6F, 0x4F },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7A, 0x5A },
+{ 0x00, 0x5B, 0x5B },
+{ 0x00, 0x5C, 0x5C },
+{ 0x00, 0x5D, 0x5D },
+{ 0x00, 0x5E, 0x5E },
+{ 0x00, 0x5F, 0x5F },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6A, 0x4A },
+{ 0x00, 0x6B, 0x4B },
+{ 0x00, 0x6C, 0x4C },
+{ 0x00, 0x6D, 0x4D },
+{ 0x00, 0x6E, 0x4E },
+{ 0x00, 0x6F, 0x4F },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7A, 0x5A },
+{ 0x00, 0x7B, 0x7B },
+{ 0x00, 0x7C, 0x7C },
+{ 0x00, 0x7D, 0x7D },
+{ 0x00, 0x7E, 0x7E },
+{ 0x00, 0x7F, 0x7F },
+{ 0x00, 0x80, 0x80 },
+{ 0x00, 0x81, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x83 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x00, 0x8A, 0x8A },
+{ 0x00, 0x8B, 0x8B },
+{ 0x00, 0x8C, 0x8C },
+{ 0x00, 0x8D, 0x8D },
+{ 0x00, 0x8E, 0x8E },
+{ 0x00, 0x8F, 0x8F },
+{ 0x00, 0x90, 0x90 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9A, 0x9A },
+{ 0x00, 0x9B, 0x9B },
+{ 0x00, 0x9C, 0x9C },
+{ 0x00, 0x9D, 0x9D },
+{ 0x00, 0x9E, 0x9E },
+{ 0x00, 0x9F, 0x9F },
+{ 0x00, 0xA0, 0xA0 },
+{ 0x00, 0xA1, 0xA1 },
+{ 0x00, 0xA2, 0xA2 },
+{ 0x00, 0xA3, 0xA3 },
+{ 0x00, 0xA4, 0xA4 },
+{ 0x00, 0xA5, 0xA5 },
+{ 0x00, 0xA6, 0xA6 },
+{ 0x00, 0xA7, 0xA7 },
+{ 0x01, 0xB8, 0xA8 },
+{ 0x00, 0xA9, 0xA9 },
+{ 0x01, 0xBA, 0xAA },
+{ 0x00, 0xAB, 0xAB },
+{ 0x00, 0xAC, 0xAC },
+{ 0x00, 0xAD, 0xAD },
+{ 0x00, 0xAE, 0xAE },
+{ 0x01, 0xBF, 0xAF },
+{ 0x00, 0xB0, 0xB0 },
+{ 0x00, 0xB1, 0xB1 },
+{ 0x00, 0xB2, 0xB2 },
+{ 0x00, 0xB3, 0xB3 },
+{ 0x00, 0xB4, 0xB4 },
+{ 0x00, 0xB5, 0xB5 },
+{ 0x00, 0xB6, 0xB6 },
+{ 0x00, 0xB7, 0xB7 },
+{ 0x00, 0xB8, 0xA8 },
+{ 0x00, 0xB9, 0xB9 },
+{ 0x00, 0xBA, 0xAA },
+{ 0x00, 0xBB, 0xBB },
+{ 0x00, 0xBC, 0xBC },
+{ 0x00, 0xBD, 0xBD },
+{ 0x00, 0xBE, 0xBE },
+{ 0x00, 0xBF, 0xAF },
+{ 0x01, 0xE0, 0xC0 },
+{ 0x01, 0xE1, 0xC1 },
+{ 0x01, 0xE2, 0xC2 },
+{ 0x01, 0xE3, 0xC3 },
+{ 0x01, 0xE4, 0xC4 },
+{ 0x01, 0xE5, 0xC5 },
+{ 0x01, 0xE6, 0xC6 },
+{ 0x01, 0xE7, 0xC7 },
+{ 0x01, 0xE8, 0xC8 },
+{ 0x01, 0xE9, 0xC9 },
+{ 0x01, 0xEA, 0xCA },
+{ 0x01, 0xEB, 0xCB },
+{ 0x01, 0xEC, 0xCC },
+{ 0x01, 0xED, 0xCD },
+{ 0x01, 0xEE, 0xCE },
+{ 0x01, 0xEF, 0xCF },
+{ 0x01, 0xF0, 0xD0 },
+{ 0x01, 0xF1, 0xD1 },
+{ 0x01, 0xF2, 0xD2 },
+{ 0x01, 0xF3, 0xD3 },
+{ 0x01, 0xF4, 0xD4 },
+{ 0x01, 0xF5, 0xD5 },
+{ 0x01, 0xF6, 0xD6 },
+{ 0x00, 0xD7, 0xD7 },
+{ 0x01, 0xF8, 0xD8 },
+{ 0x01, 0xF9, 0xD9 },
+{ 0x01, 0xFA, 0xDA },
+{ 0x01, 0xFB, 0xDB },
+{ 0x01, 0xFC, 0xDC },
+{ 0x01, 0xFD, 0xDD },
+{ 0x01, 0xFE, 0xDE },
+{ 0x00, 0xDF, 0xDF },
+{ 0x00, 0xE0, 0xC0 },
+{ 0x00, 0xE1, 0xC1 },
+{ 0x00, 0xE2, 0xC2 },
+{ 0x00, 0xE3, 0xC3 },
+{ 0x00, 0xE4, 0xC4 },
+{ 0x00, 0xE5, 0xC5 },
+{ 0x00, 0xE6, 0xC6 },
+{ 0x00, 0xE7, 0xC7 },
+{ 0x00, 0xE8, 0xC8 },
+{ 0x00, 0xE9, 0xC9 },
+{ 0x00, 0xEA, 0xCA },
+{ 0x00, 0xEB, 0xCB },
+{ 0x00, 0xEC, 0xCC },
+{ 0x00, 0xED, 0xCD },
+{ 0x00, 0xEE, 0xCE },
+{ 0x00, 0xEF, 0xCF },
+{ 0x00, 0xF0, 0xD0 },
+{ 0x00, 0xF1, 0xD1 },
+{ 0x00, 0xF2, 0xD2 },
+{ 0x00, 0xF3, 0xD3 },
+{ 0x00, 0xF4, 0xD4 },
+{ 0x00, 0xF5, 0xD5 },
+{ 0x00, 0xF6, 0xD6 },
+{ 0x00, 0xF7, 0xF7 },
+{ 0x00, 0xF8, 0xD8 },
+{ 0x00, 0xF9, 0xD9 },
+{ 0x00, 0xFA, 0xDA },
+{ 0x00, 0xFB, 0xDB },
+{ 0x00, 0xFC, 0xDC },
+{ 0x00, 0xFD, 0xDD },
+{ 0x00, 0xFE, 0xDE },
+{ 0x00, 0xFF, 0xFF },
+};
+
+
 struct cs_info iso14_tbl[] = {
 { 0x00, 0x00, 0x00 },
 { 0x00, 0x01, 0x01 },
@@ -3547,6 +4385,264 @@ struct cs_info iso14_tbl[] = {
 { 0x00, 0xff, 0xff },
 };
 
+struct cs_info iso15_tbl[] = {
+{ 0x00, 0x00, 0x00 },
+{ 0x00, 0x01, 0x01 },
+{ 0x00, 0x02, 0x02 },
+{ 0x00, 0x03, 0x03 },
+{ 0x00, 0x04, 0x04 },
+{ 0x00, 0x05, 0x05 },
+{ 0x00, 0x06, 0x06 },
+{ 0x00, 0x07, 0x07 },
+{ 0x00, 0x08, 0x08 },
+{ 0x00, 0x09, 0x09 },
+{ 0x00, 0x0a, 0x0a },
+{ 0x00, 0x0b, 0x0b },
+{ 0x00, 0x0c, 0x0c },
+{ 0x00, 0x0d, 0x0d },
+{ 0x00, 0x0e, 0x0e },
+{ 0x00, 0x0f, 0x0f },
+{ 0x00, 0x10, 0x10 },
+{ 0x00, 0x11, 0x11 },
+{ 0x00, 0x12, 0x12 },
+{ 0x00, 0x13, 0x13 },
+{ 0x00, 0x14, 0x14 },
+{ 0x00, 0x15, 0x15 },
+{ 0x00, 0x16, 0x16 },
+{ 0x00, 0x17, 0x17 },
+{ 0x00, 0x18, 0x18 },
+{ 0x00, 0x19, 0x19 },
+{ 0x00, 0x1a, 0x1a },
+{ 0x00, 0x1b, 0x1b },
+{ 0x00, 0x1c, 0x1c },
+{ 0x00, 0x1d, 0x1d },
+{ 0x00, 0x1e, 0x1e },
+{ 0x00, 0x1f, 0x1f },
+{ 0x00, 0x20, 0x20 },
+{ 0x00, 0x21, 0x21 },
+{ 0x00, 0x22, 0x22 },
+{ 0x00, 0x23, 0x23 },
+{ 0x00, 0x24, 0x24 },
+{ 0x00, 0x25, 0x25 },
+{ 0x00, 0x26, 0x26 },
+{ 0x00, 0x27, 0x27 },
+{ 0x00, 0x28, 0x28 },
+{ 0x00, 0x29, 0x29 },
+{ 0x00, 0x2a, 0x2a },
+{ 0x00, 0x2b, 0x2b },
+{ 0x00, 0x2c, 0x2c },
+{ 0x00, 0x2d, 0x2d },
+{ 0x00, 0x2e, 0x2e },
+{ 0x00, 0x2f, 0x2f },
+{ 0x00, 0x30, 0x30 },
+{ 0x00, 0x31, 0x31 },
+{ 0x00, 0x32, 0x32 },
+{ 0x00, 0x33, 0x33 },
+{ 0x00, 0x34, 0x34 },
+{ 0x00, 0x35, 0x35 },
+{ 0x00, 0x36, 0x36 },
+{ 0x00, 0x37, 0x37 },
+{ 0x00, 0x38, 0x38 },
+{ 0x00, 0x39, 0x39 },
+{ 0x00, 0x3a, 0x3a },
+{ 0x00, 0x3b, 0x3b },
+{ 0x00, 0x3c, 0x3c },
+{ 0x00, 0x3d, 0x3d },
+{ 0x00, 0x3e, 0x3e },
+{ 0x00, 0x3f, 0x3f },
+{ 0x00, 0x40, 0x40 },
+{ 0x01, 0x61, 0x41 },
+{ 0x01, 0x62, 0x42 },
+{ 0x01, 0x63, 0x43 },
+{ 0x01, 0x64, 0x44 },
+{ 0x01, 0x65, 0x45 },
+{ 0x01, 0x66, 0x46 },
+{ 0x01, 0x67, 0x47 },
+{ 0x01, 0x68, 0x48 },
+{ 0x01, 0x69, 0x49 },
+{ 0x01, 0x6a, 0x4a },
+{ 0x01, 0x6b, 0x4b },
+{ 0x01, 0x6c, 0x4c },
+{ 0x01, 0x6d, 0x4d },
+{ 0x01, 0x6e, 0x4e },
+{ 0x01, 0x6f, 0x4f },
+{ 0x01, 0x70, 0x50 },
+{ 0x01, 0x71, 0x51 },
+{ 0x01, 0x72, 0x52 },
+{ 0x01, 0x73, 0x53 },
+{ 0x01, 0x74, 0x54 },
+{ 0x01, 0x75, 0x55 },
+{ 0x01, 0x76, 0x56 },
+{ 0x01, 0x77, 0x57 },
+{ 0x01, 0x78, 0x58 },
+{ 0x01, 0x79, 0x59 },
+{ 0x01, 0x7a, 0x5a },
+{ 0x00, 0x5b, 0x5b },
+{ 0x00, 0x5c, 0x5c },
+{ 0x00, 0x5d, 0x5d },
+{ 0x00, 0x5e, 0x5e },
+{ 0x00, 0x5f, 0x5f },
+{ 0x00, 0x60, 0x60 },
+{ 0x00, 0x61, 0x41 },
+{ 0x00, 0x62, 0x42 },
+{ 0x00, 0x63, 0x43 },
+{ 0x00, 0x64, 0x44 },
+{ 0x00, 0x65, 0x45 },
+{ 0x00, 0x66, 0x46 },
+{ 0x00, 0x67, 0x47 },
+{ 0x00, 0x68, 0x48 },
+{ 0x00, 0x69, 0x49 },
+{ 0x00, 0x6a, 0x4a },
+{ 0x00, 0x6b, 0x4b },
+{ 0x00, 0x6c, 0x4c },
+{ 0x00, 0x6d, 0x4d },
+{ 0x00, 0x6e, 0x4e },
+{ 0x00, 0x6f, 0x4f },
+{ 0x00, 0x70, 0x50 },
+{ 0x00, 0x71, 0x51 },
+{ 0x00, 0x72, 0x52 },
+{ 0x00, 0x73, 0x53 },
+{ 0x00, 0x74, 0x54 },
+{ 0x00, 0x75, 0x55 },
+{ 0x00, 0x76, 0x56 },
+{ 0x00, 0x77, 0x57 },
+{ 0x00, 0x78, 0x58 },
+{ 0x00, 0x79, 0x59 },
+{ 0x00, 0x7a, 0x5a },
+{ 0x00, 0x7b, 0x7b },
+{ 0x00, 0x7c, 0x7c },
+{ 0x00, 0x7d, 0x7d },
+{ 0x00, 0x7e, 0x7e },
+{ 0x00, 0x7f, 0x7f },
+{ 0x00, 0x80, 0x80 },
+{ 0x00, 0x81, 0x81 },
+{ 0x00, 0x82, 0x82 },
+{ 0x00, 0x83, 0x83 },
+{ 0x00, 0x84, 0x84 },
+{ 0x00, 0x85, 0x85 },
+{ 0x00, 0x86, 0x86 },
+{ 0x00, 0x87, 0x87 },
+{ 0x00, 0x88, 0x88 },
+{ 0x00, 0x89, 0x89 },
+{ 0x00, 0x8a, 0x8a },
+{ 0x00, 0x8b, 0x8b },
+{ 0x00, 0x8c, 0x8c },
+{ 0x00, 0x8d, 0x8d },
+{ 0x00, 0x8e, 0x8e },
+{ 0x00, 0x8f, 0x8f },
+{ 0x00, 0x90, 0x90 },
+{ 0x00, 0x91, 0x91 },
+{ 0x00, 0x92, 0x92 },
+{ 0x00, 0x93, 0x93 },
+{ 0x00, 0x94, 0x94 },
+{ 0x00, 0x95, 0x95 },
+{ 0x00, 0x96, 0x96 },
+{ 0x00, 0x97, 0x97 },
+{ 0x00, 0x98, 0x98 },
+{ 0x00, 0x99, 0x99 },
+{ 0x00, 0x9a, 0x9a },
+{ 0x00, 0x9b, 0x9b },
+{ 0x00, 0x9c, 0x9c },
+{ 0x00, 0x9d, 0x9d },
+{ 0x00, 0x9e, 0x9e },
+{ 0x00, 0x9f, 0x9f },
+{ 0x00, 0xa0, 0xa0 },
+{ 0x00, 0xa1, 0xa1 },
+{ 0x00, 0xa2, 0xa2 },
+{ 0x00, 0xa3, 0xa3 },
+{ 0x00, 0xa4, 0xa4 },
+{ 0x00, 0xa5, 0xa5 },
+{ 0x01, 0xa8, 0xa6 },
+{ 0x00, 0xa7, 0xa7 },
+{ 0x00, 0xa8, 0xa6 },
+{ 0x00, 0xa9, 0xa9 },
+{ 0x00, 0xaa, 0xaa },
+{ 0x00, 0xab, 0xab },
+{ 0x00, 0xac, 0xac },
+{ 0x00, 0xad, 0xad },
+{ 0x00, 0xae, 0xae },
+{ 0x00, 0xaf, 0xaf },
+{ 0x00, 0xb0, 0xb0 },
+{ 0x00, 0xb1, 0xb1 },
+{ 0x00, 0xb2, 0xb2 },
+{ 0x00, 0xb3, 0xb3 },
+{ 0x01, 0xb8, 0xb4 },
+{ 0x00, 0xb5, 0xb5 },
+{ 0x00, 0xb6, 0xb6 },
+{ 0x00, 0xb7, 0xb7 },
+{ 0x00, 0xb8, 0xb4 },
+{ 0x00, 0xb9, 0xb9 },
+{ 0x00, 0xba, 0xba },
+{ 0x00, 0xbb, 0xbb },
+{ 0x01, 0xbd, 0xbc },
+{ 0x00, 0xbd, 0xbc },
+{ 0x01, 0xff, 0xbe },
+{ 0x00, 0xbf, 0xbf },
+{ 0x01, 0xe0, 0xc0 },
+{ 0x01, 0xe1, 0xc1 },
+{ 0x01, 0xe2, 0xc2 },
+{ 0x01, 0xe3, 0xc3 },
+{ 0x01, 0xe4, 0xc4 },
+{ 0x01, 0xe5, 0xc5 },
+{ 0x01, 0xe6, 0xc6 },
+{ 0x01, 0xe7, 0xc7 },
+{ 0x01, 0xe8, 0xc8 },
+{ 0x01, 0xe9, 0xc9 },
+{ 0x01, 0xea, 0xca },
+{ 0x01, 0xeb, 0xcb },
+{ 0x01, 0xec, 0xcc },
+{ 0x01, 0xed, 0xcd },
+{ 0x01, 0xee, 0xce },
+{ 0x01, 0xef, 0xcf },
+{ 0x01, 0xf0, 0xd0 },
+{ 0x01, 0xf1, 0xd1 },
+{ 0x01, 0xf2, 0xd2 },
+{ 0x01, 0xf3, 0xd3 },
+{ 0x01, 0xf4, 0xd4 },
+{ 0x01, 0xf5, 0xd5 },
+{ 0x01, 0xf6, 0xd6 },
+{ 0x00, 0xd7, 0xd7 },
+{ 0x01, 0xf8, 0xd8 },
+{ 0x01, 0xf9, 0xd9 },
+{ 0x01, 0xfa, 0xda },
+{ 0x01, 0xfb, 0xdb },
+{ 0x01, 0xfc, 0xdc },
+{ 0x01, 0xfd, 0xdd },
+{ 0x01, 0xfe, 0xde },
+{ 0x00, 0xdf, 0xdf },
+{ 0x00, 0xe0, 0xc0 },
+{ 0x00, 0xe1, 0xc1 },
+{ 0x00, 0xe2, 0xc2 },
+{ 0x00, 0xe3, 0xc3 },
+{ 0x00, 0xe4, 0xc4 },
+{ 0x00, 0xe5, 0xc5 },
+{ 0x00, 0xe6, 0xc6 },
+{ 0x00, 0xe7, 0xc7 },
+{ 0x00, 0xe8, 0xc8 },
+{ 0x00, 0xe9, 0xc9 },
+{ 0x00, 0xea, 0xca },
+{ 0x00, 0xeb, 0xcb },
+{ 0x00, 0xec, 0xcc },
+{ 0x00, 0xed, 0xcd },
+{ 0x00, 0xee, 0xce },
+{ 0x00, 0xef, 0xcf },
+{ 0x00, 0xf0, 0xd0 },
+{ 0x00, 0xf1, 0xd1 },
+{ 0x00, 0xf2, 0xd2 },
+{ 0x00, 0xf3, 0xd3 },
+{ 0x00, 0xf4, 0xd4 },
+{ 0x00, 0xf5, 0xd5 },
+{ 0x00, 0xf6, 0xd6 },
+{ 0x00, 0xf7, 0xf7 },
+{ 0x00, 0xf8, 0xd8 },
+{ 0x00, 0xf9, 0xd9 },
+{ 0x00, 0xfa, 0xda },
+{ 0x00, 0xfb, 0xdb },
+{ 0x00, 0xfc, 0xdc },
+{ 0x00, 0xfd, 0xdd },
+{ 0x00, 0xfe, 0xde },
+{ 0x00, 0xff, 0xbe },
+};
 
 struct cs_info iscii_devanagari_tbl[] = {
 { 0x00, 0x00, 0x00 },
@@ -3807,8 +4903,6 @@ struct cs_info iscii_devanagari_tbl[] = {
 { 0x00, 0xff, 0xff },
 };
 
-
-
 struct enc_entry encds[] = {
 {"ISO8859-1",iso1_tbl},
 {"ISO8859-2",iso2_tbl},
@@ -3821,8 +4915,11 @@ struct enc_entry encds[] = {
 {"ISO8859-9",iso9_tbl},
 {"ISO8859-10",iso10_tbl},
 {"KOI8-R",koi8r_tbl},
-{"CP-1251",cp1251_tbl},
+{"KOI8-U",koi8u_tbl},
+{"microsoft-cp1251",cp1251_tbl},
+{"ISO8859-13", iso13_tbl},
 {"ISO8859-14", iso14_tbl},
+{"ISO8859-15", iso15_tbl},
 {"ISCII-DEVANAGARI", iscii_devanagari_tbl},
 };
 
@@ -3836,28 +4933,41 @@ struct cs_info * get_current_cs(const char * es) {
     }
   }
   return ccs;
-}
+};
 
+struct unicode_info * get_utf_cs() {
+  return utf_lst;
+};
 
+int get_utf_cs_len() {
+  return UTF_LST_LEN;
+};
 
 struct lang_map lang2enc[] = {
-  {"ca","ISO8859-1"},
-  {"cs","ISO8859-2"},
-  {"da","ISO8859-1"},
-  {"de","ISO8859-1"},
-  {"el","ISO8859-7"},
-  {"en","ISO8859-1"},
-  {"es","ISO8859-1"},
-  {"fr","ISO8859-1"},
-  {"hr","ISO8859-2"},
-  {"hu","ISO8859-2"},
-  {"it","ISO8859-1"},
-  {"la","ISO8859-1"},
-  {"nl","ISO8859-1"},
-  {"pl","ISO8859-2"},
-  {"pt","ISO8859-1"},
-  {"sv","ISO8859-1"},
-  {"ru","KOI8-R"},
+{"az", "UTF-8", LANG_az},
+{"bg", "microsoft-cp1251", LANG_bg},
+{"ca", "ISO8859-1", LANG_ca},
+{"cs", "ISO8859-2", LANG_cs},
+{"da", "ISO8859-1", LANG_da},
+{"de", "ISO8859-1", LANG_de},
+{"el", "ISO8859-7", LANG_el},
+{"en", "ISO8859-1", LANG_en},
+{"es", "ISO8859-1", LANG_es},
+{"eu", "ISO8859-1", LANG_eu},
+{"gl", "ISO8859-1", LANG_gl},
+{"fr", "ISO8859-15", LANG_fr},
+{"hr", "ISO8859-2", LANG_hr},
+{"hu", "ISO8859-2", LANG_hu},
+{"it", "ISO8859-1", LANG_it},
+{"la", "ISO8859-1", LANG_la},
+{"lv", "ISO8859-13", LANG_lv},
+{"nl", "ISO8859-1", LANG_nl},
+{"pl", "ISO8859-2", LANG_pl},
+{"pt", "ISO8859-1", LANG_pt},
+{"sv", "ISO8859-1", LANG_sv},
+{"tr", "UTF-8", LANG_tr},
+{"ru", "KOI8-R", LANG_ru},
+{"uk", "KOI8-U", LANG_uk}
 };
 
 
@@ -3869,5 +4979,14 @@ const char * get_default_enc(const char * lang) {
     }
   }
   return NULL;
-}
+};
 
+int get_lang_num(const char * lang) {
+  int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
+  for (int i = 0; i < n; i++) {
+    if (strncmp(lang,lang2enc[i].lang,2) == 0) {
+      return lang2enc[i].num;
+    }
+  }
+  return LANG_xx;
+};
diff --git a/src/myspell/csutil.hxx b/src/myspell/csutil.hxx
index 037eab9..aa50a58 100644
--- a/src/myspell/csutil.hxx
+++ b/src/myspell/csutil.hxx
@@ -1,36 +1,88 @@
 #ifndef __CSUTILHXX__
 #define __CSUTILHXX__
 
-
 // First some base level utility routines
 
+typedef struct {
+    unsigned char l;
+    unsigned char h;
+} w_char;
+
+// convert UTF-16 characters to UTF-8
+char * u16_u8(char * dest, int size, const w_char * src, int srclen);
+
+// convert UTF-8 characters to UTF-16
+int u8_u16(w_char * dest, int size, const char * src);
+
+// sort 2-byte vector
+void flag_qsort(unsigned short flags[], int begin, int end);
+
+// binary search in 2-byte vector
+int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
+
 // remove end of line char(s)
 void   mychomp(char * s);
 
-// duplicate string                          
+// duplicate string
 char * mystrdup(const char * s);
 
-// duplicate reverse of string                   
+// duplicate reverse of string
 char * myrevstrdup(const char * s);
 
-// parse into tokens with char delimiter                
+// parse into tokens with char delimiter
 char * mystrsep(char ** sptr, const char delim);
+// parse into tokens with char delimiter
+char * mystrsep2(char ** sptr, const char delim);
+
+// parse into tokens with char delimiter
+char * mystrrep(char *, const char *, const char *);
 
-// is one string a leading subset of another   
+// is one string a leading subset of another
 int    isSubset(const char * s1, const char * s2);
 
 // is one reverse string a leading subset of the end of another   
-int    isRevSubset(const char * s1, const char * end_of_s2, int s2_len);
+int    isRevSubset(const char * s1, const char * s2, int len);
 
+// append s to ends of every lines in text
+void strlinecat(char * lines, const char * s);
 
-// character encoding information
+// tokenize into lines with new line
+   int line_tok(const char * text, char *** lines);
+
+// tokenize into lines with new line and uniq in place
+   char * line_uniq(char * text);
+
+// change \n to c in place
+   char * line_join(char * text, char c);
 
+// leave only last {[^}]*} pattern in string
+   char * delete_zeros(char * morphout);
+
+// reverse word
+   void reverseword(char *);
+
+// reverse word
+   void reverseword_utf(char *);
+
+// character encoding information
 struct cs_info {
   unsigned char ccase;
   unsigned char clower;
   unsigned char cupper;
 };
 
+// Unicode character encoding information
+struct unicode_info {
+  unsigned short c;
+  unsigned short cupper;
+  unsigned short clower;
+};
+
+struct unicode_info2 {
+  char cletter;
+  unsigned short cupper;
+  unsigned short clower;
+};
 
 struct enc_entry {
   const char * enc_name;
@@ -42,13 +94,20 @@ struct enc_entry {
 struct lang_map {
   const char * lang;
   const char * def_enc;
+  int num;
 };
 
 struct cs_info * get_current_cs(const char * es);
 
+struct unicode_info * get_utf_cs();
+
+int get_utf_cs_len();
+
 const char * get_default_enc(const char * lang);
 
-// convert null terminated string to all caps using encoding 
+int get_lang_num(const char * lang);
+
+// convert null terminated string to all caps using encoding
 void enmkallcap(char * d, const char * p, const char * encoding);
 
 // convert null terminated string to all little using encoding
@@ -57,7 +116,7 @@ void enmkallsmall(char * d, const char * p, const char * encoding);
 // convert null terminated string to have intial capital using encoding
 void enmkinitcap(char * d, const char * p, const char * encoding);
 
-// convert null terminated string to all caps 
+// convert null terminated string to all caps
 void mkallcap(char * p, const struct cs_info * csconv);
 
 // convert null terminated string to all little
@@ -66,5 +125,7 @@ void mkallsmall(char * p, const struct cs_info * csconv);
 // convert null terminated string to have intial capital
 void mkinitcap(char * p, const struct cs_info * csconv);
 
+// convert first nc characters of UTF-8 string to little
+void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv);
 
 #endif
diff --git a/src/myspell/enchant_myspell.hxx b/src/myspell/enchant_myspell.hxx
deleted file mode 100644
index 0c18549..0000000
--- a/src/myspell/enchant_myspell.hxx
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef _MYSPELLMGR_HXX_
-#define _MYSPELLMGR_HXX_
-
-#include "hashmgr.hxx"
-#include "affixmgr.hxx"
-#include "suggestmgr.hxx"
-#include "csutil.hxx"
-
-#define NOCAP   0
-#define INITCAP 1
-#define ALLCAP  2
-#define HUHCAP  3
-
-#ifdef WINDOWS
-#define DLLSUPPORT __declspec(dllexport)
-#else
-#define DLLSUPPORT
-#endif
-
-class DLLSUPPORT MySpell
-{
-  AffixMgr*       pAMgr;
-  HashMgr*        pHMgr;
-  SuggestMgr*     pSMgr;
-  char *          encoding;
-  struct cs_info * csconv;
-  int             maxSug;
-
-public:
-  MySpell(const char * affpath, const char * dpath);
-  ~MySpell();
-
-  int suggest(char*** slst, const char * word);
-  int spell(const char *);
-  char * get_dic_encoding();
-
-private:
-   int    cleanword(char *, const char *, int *, int *);
-   char * check(const char *);
-};
-
-#endif
diff --git a/src/myspell/hashmgr.cxx b/src/myspell/hashmgr.cxx
index d7b4ec8..29a05c3 100644
--- a/src/myspell/hashmgr.cxx
+++ b/src/myspell/hashmgr.cxx
@@ -1,25 +1,36 @@
-#include "license.readme"
+#include "license.hunspell"
+#include "license.myspell"
 
 #include <cstdlib>
 #include <cstring>
+#include <cctype>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
 #include <cstdio>
 
 #include "hashmgr.hxx"
+#include "csutil.hxx"
 
-extern void mychomp(char * s);
-extern char * mystrdup(const char *);
-
-#ifndef WINDOWS
+#ifndef W32
+#include <unistd.h>
 using namespace std;
 #endif
 
-
 // build a hash table from a munched word list
 
-HashMgr::HashMgr(const char * tpath)
+HashMgr::HashMgr(const char * tpath, const char * apath)
 {
   tablesize = 0;
   tableptr = NULL;
+  flag_mode = FLAG_CHAR;
+  complexprefixes = 0;
+  utf8 = 0;
+  numaliasf = 0;
+  aliasf = NULL;
+  numaliasm = 0;
+  aliasm = NULL;
+  load_config(apath);  
   int ec = load_tables(tpath);
   if (ec) {
     /* error condition - what should we do here */
@@ -42,14 +53,17 @@ HashMgr::~HashMgr()
       struct hentry * pt = &tableptr[i];
       struct hentry * nt = NULL;
       if (pt) {
+        if (pt->astr && !aliasf) free(pt->astr);
 	if (pt->word) free(pt->word);
-        if (pt->astr) free(pt->astr);
+        if (pt->description && !aliasm) free(pt->description);
+
         pt = pt->next;
       }
       while(pt) {
         nt = pt->next;
+        if (pt->astr && !aliasf) free(pt->astr);
 	if (pt->word) free(pt->word);
-        if (pt->astr) free(pt->astr);
+        if (pt->description && !aliasm) free(pt->description);
         free(pt);
 	pt = nt;
       }
@@ -57,9 +71,22 @@ HashMgr::~HashMgr()
     free(tableptr);
   }
   tablesize = 0;
-}
-
 
+  if (aliasf) {
+    for (int j = 0; j < (numaliasf); j++) free(aliasf[j]);
+    free(aliasf);
+    aliasf = NULL;
+    if (aliasflen) {
+      free(aliasflen);
+      aliasflen = NULL;
+    }
+  }
+  if (aliasm) {
+    for (int j = 0; j < (numaliasm); j++) free(aliasm[j]);
+    free(aliasm);
+    aliasm = NULL;
+  }  
+}
 
 // lookup a root word in the hashtable
 
@@ -76,40 +103,87 @@ struct hentry * HashMgr::lookup(const char *word) const
     return NULL;
 }
 
-
-
 // add a word to the hash table (private)
 
-int HashMgr::add_word(const char * word, int wl, const char * aff, int al)
+int HashMgr::add_word(const char * word, int wl, unsigned short * aff, int al, const char * desc)
 {
-    int i = hash(word);
+    char * st = mystrdup(word);
+    if (wl && !st) return 1;
+    if (complexprefixes) {
+        if (utf8) reverseword_utf(st); else reverseword(st);
+    }
+    int i = hash(st);
     struct hentry * dp = &tableptr[i];
-    struct hentry* hp;
     if (dp->word == NULL) {
        dp->wlen = wl;
        dp->alen = al;
-       dp->word = mystrdup(word);
-       dp->astr = mystrdup(aff);
+       dp->word = st;
+       dp->astr = aff;
        dp->next = NULL;
-       if ((wl) && (dp->word == NULL)) return 1;
-       if ((al) && (dp->astr == NULL)) return 1;
+       dp->next_homonym = NULL;
+       if (aliasm) {
+            dp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+       } else {
+            dp->description = mystrdup(desc);
+            if (desc && !dp->description) return 1;
+            if (dp->description && complexprefixes) {
+                if (utf8) reverseword_utf(dp->description); else reverseword(dp->description);
+            }
+       }
     } else {
-       hp = (struct hentry *) malloc (sizeof(struct hentry));
-       if (hp == NULL) return 1;
+       struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry));
+       if (!hp) return 1;
        hp->wlen = wl;
        hp->alen = al;
-       hp->word = mystrdup(word);
-       hp->astr = mystrdup(aff);
+       hp->word = st;
+       hp->astr = aff;
        hp->next = NULL;      
-       while (dp->next != NULL) dp=dp->next; 
+       hp->next_homonym = NULL;
+       if (aliasm) {
+            hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
+       } else {
+            hp->description = mystrdup(desc);
+            if (desc && !hp->description) return 1;
+            if (dp->description && complexprefixes) {
+                if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
+            }
+       }
+       while (dp->next != NULL) {
+         if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
+         dp=dp->next;
+       }
+       if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) dp->next_homonym = hp;
        dp->next = hp;
-       if ((wl) && (hp->word == NULL)) return 1;
-       if ((al) && (hp->astr == NULL)) return 1;
     }
     return 0;
 }     
 
+// add a custom dic. word to the hash table (public)
 
+int HashMgr::put_word(const char * word, int wl, char * aff)
+{
+    unsigned short * flags;
+    int al = 0;
+    if (aff) {
+	al = decode_flags(&flags, aff);
+	flag_qsort(flags, 0, al);
+    } else {
+	flags = NULL;
+    }
+    add_word(word, wl, flags, al, NULL);
+    return 0;
+}
+
+int HashMgr::put_word_pattern(const char * word, int wl, const char * pattern)
+{
+    unsigned short * flags;
+    struct hentry * dp = lookup(pattern);
+    if (!dp || !dp->astr) return 1;
+    flags = (unsigned short *) malloc (dp->alen * sizeof(short));
+    memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
+    add_word(word, wl, flags, dp->alen, NULL);
+    return 0;
+}
 
 // walk the hash table entry by entry - null at end
 struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
@@ -137,14 +211,13 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const
   return hp;
 }
 
-
-
 // load a munched word list and build a hash table on the fly
-
 int HashMgr::load_tables(const char * tpath)
 {
   int wl, al;
   char * ap;
+  char * dp;
+  unsigned short * flags;
 
   // raw dictionary - munched file
   FILE * rawdict = fopen(tpath, "r");
@@ -154,39 +227,72 @@ int HashMgr::load_tables(const char * tpath)
   char ts[MAXDELEN];
   if (! fgets(ts, MAXDELEN-1,rawdict)) return 2;
   mychomp(ts);
+  if ((*ts < '1') || (*ts > '9')) fprintf(stderr, "error - missing word count in dictionary file\n");
   tablesize = atoi(ts);
   if (!tablesize) return 4; 
-  tablesize = tablesize + 5;
+  tablesize = tablesize + 5 + USERWORD;
   if ((tablesize %2) == 0) tablesize++;
 
   // allocate the hash table
   tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
   if (! tableptr) return 3;
+  for (int i=0; i<tablesize; i++) tableptr[i].word = NULL;
 
   // loop through all words on much list and add to hash
   // table and create word and affix strings
 
   while (fgets(ts,MAXDELEN-1,rawdict)) {
     mychomp(ts);
+    // split each line into word and morphological description
+    dp = strchr(ts,'\t');
+
+    if (dp) {
+      *dp = '\0';
+      dp++;
+    } else {
+      dp = NULL;
+    }
+
     // split each line into word and affix char strings
-    ap = strchr(ts,'/');
+    // "\/" signs slash in words (not affix separator)
+    // "/" at beginning of the line is word character (not affix separator)
+    ap = ts;
+    while (ap = strchr(ap,'/')) {
+        if (ap == ts) {
+            ap++;
+            continue;
+        } else if (*(ap - 1) != '\\') break;
+        // replace "\/" with "/"
+        for (char * sp = ap - 1; *sp; *sp = *(sp + 1), sp++);
+        
+    }
+
     if (ap) {
       *ap = '\0';
-      ap++;
-      al = strlen(ap);
+      if (aliasf) {
+        int index = atoi(ap + 1);
+        al = get_aliasf(index, &flags);
+        if (!al) {
+            fprintf(stderr, "error - bad flag vector alias: %s\n", ts);
+            *ap = '\0';
+        }
+      } else {
+        al = decode_flags(&flags, ap + 1);
+        flag_qsort(flags, 0, al);
+      }
     } else {
       al = 0;
       ap = NULL;
+      flags = NULL;
     }
 
     wl = strlen(ts);
 
     // add the word and its index
-    if (add_word(ts,wl,ap,al)) 
-      return 5;;
+    if (add_word(ts,wl,flags,al,dp)) return 5;
 
   }
-
+ 
   fclose(rawdict);
   return 0;
 }
@@ -207,3 +313,367 @@ int HashMgr::hash(const char * word) const
     return (unsigned long) hv % tablesize;
 }
 
+int HashMgr::decode_flags(unsigned short ** result, char * flags) {
+    int len;
+    switch (flag_mode) {
+      case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+        len = strlen(flags);
+        if (len%2 == 1) fprintf(stderr,"error: length of FLAG_LONG flagvector is odd: %s\n", flags);
+        len = len/2;
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        for (int i = 0; i < len; i++) {
+            (*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1]; 
+        }
+        break;
+      }
+      case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 23 233)
+        len = 1;
+        char * src = flags; 
+        unsigned short * dest;
+        char * p;
+        for (p = flags; *p; p++) {
+          if (*p == ',') len++;
+        }
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        dest = *result;
+        for (p = flags; *p; p++) {
+          if (*p == ',') {
+            *dest = (unsigned short) atoi(src);
+            if (*dest == 0) fprintf(stderr, "error: 0 is wrong flag id\n");
+            src = p + 1;
+            dest++;
+          }
+        }
+        *dest = (unsigned short) atoi(src);
+        if (*dest == 0) fprintf(stderr, "error: 0 is wrong flag id\n");
+        break;
+      }    
+      case FLAG_UNI: { // UTF-8 characters
+        w_char w[MAXDELEN/2];
+        len = u8_u16(w, MAXDELEN/2, flags);
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        memcpy(*result, w, len * sizeof(short));
+        break;
+      }
+      default: { // Ispell's one-character flags (erfg -> e r f g)
+        unsigned short * dest;
+        len = strlen(flags);
+        *result = (unsigned short *) malloc(len * sizeof(short));
+        dest = *result;
+        for (unsigned char * p = (unsigned char *) flags; *p; p++) {
+          *dest = (unsigned short) *p;
+          dest++;
+        }
+      }
+    }      
+    return len;
+}
+
+unsigned short HashMgr::decode_flag(const char * f) {
+    unsigned short s = 0;
+    switch (flag_mode) {
+      case FLAG_LONG:
+        s = ((unsigned short) f[0] << 8) + (unsigned short) f[1];
+        break;
+      case FLAG_NUM:
+        s = (unsigned short) atoi(f);
+        break;
+      case FLAG_UNI:
+        u8_u16((w_char *) &s, 1, f);
+        break;
+      default:
+        s = (unsigned short) *((unsigned char *)f);
+    }
+    if (!s) fprintf(stderr, "error: 0 is wrong flag id\n");
+    return s;
+}
+
+char * HashMgr::encode_flag(unsigned short f) {
+    unsigned char ch[10];
+    if (f==0) return mystrdup("(NULL)");
+    if (flag_mode == FLAG_LONG) {
+        ch[0] = (unsigned char) (f >> 8);
+        ch[1] = (unsigned char) (f - ((f >> 8) << 8));
+        ch[2] = '\0';
+    } else if (flag_mode == FLAG_NUM) {
+        sprintf((char *) ch, "%d", f);
+    } else if (flag_mode == FLAG_UNI) {
+        u16_u8((char *) &ch, 10, (w_char *) &f, 1);
+    } else {
+        ch[0] = (unsigned char) (f);
+        ch[1] = '\0';
+    }
+    return mystrdup((char *) ch);
+}
+
+// read in aff file and set flag mode
+int  HashMgr::load_config(const char * affpath)
+{
+
+  // io buffers
+  char line[MAXDELEN+1];
+ 
+  // open the affix file
+  FILE * afflst;
+  afflst = fopen(affpath,"r");
+  if (!afflst) {
+    fprintf(stderr,"Error - could not open affix description file %s\n",affpath);
+    return 1;
+  }
+
+    // read in each line ignoring any that do not
+    // start with a known line type indicator
+
+    while (fgets(line,MAXDELEN,afflst)) {
+        mychomp(line);
+
+        /* parse in the try string */
+        if ((strncmp(line,"FLAG",4) == 0) && isspace(line[4])) {
+            if (flag_mode != FLAG_CHAR) {
+                fprintf(stderr,"error: duplicate FLAG parameter\n");
+            }
+            if (strstr(line, "long")) flag_mode = FLAG_LONG;
+            if (strstr(line, "num")) flag_mode = FLAG_NUM;
+            if (strstr(line, "UTF-8")) flag_mode = FLAG_UNI;
+            if (flag_mode == FLAG_CHAR) {
+                fprintf(stderr,"error: FLAG need `num', `long' or `UTF-8' parameter: %s\n", line);
+            }
+        }
+        if ((strncmp(line,"SET",3) == 0) && isspace(line[3]) && strstr(line, "UTF-8")) utf8 = 1;
+
+       if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) {
+          if (parse_aliasf(line, afflst)) {
+             return 1;
+          }
+       }
+
+       if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
+          if (parse_aliasm(line, afflst)) {
+             return 1;
+          }
+       }
+
+        if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
+        if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
+    }
+    fclose(afflst);
+    return 0;
+}
+
+/* parse in the ALIAS table */
+int  HashMgr::parse_aliasf(char * line, FILE * af)
+{
+   if (numaliasf != 0) {
+      fprintf(stderr,"error: duplicate AF (alias for flag vector) tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numaliasf = atoi(piece);
+	               if (numaliasf < 1) {
+                          numaliasf = 0;
+                          aliasf = NULL;
+                          aliasflen = NULL;
+			  fprintf(stderr,"incorrect number of entries in AF table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
+                       aliasflen = (unsigned short *) malloc(numaliasf * sizeof(short));
+                       if (!aliasf || !aliasflen) {
+                          numaliasf = 0;
+                          if (aliasf) free(aliasf);
+                          if (aliasflen) free(aliasflen);
+                          aliasf = NULL;
+                          aliasflen = NULL;
+                          return 1;
+                       }
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      numaliasf = 0;
+      free(aliasf);
+      free(aliasflen);
+      aliasf = NULL;
+      aliasflen = NULL;
+      fprintf(stderr,"error: missing AF table information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numaliasf lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numaliasf; j++) {
+        if (!fgets(nl,MAXDELEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        aliasf[j] = NULL;
+        aliasflen[j] = 0;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"AF",2) != 0) {
+                                 numaliasf = 0;
+                                 free(aliasf);
+                                 free(aliasflen);
+                                 aliasf = NULL;
+                                 aliasflen = NULL;
+		                 fprintf(stderr,"error: AF table is corrupt\n");
+                                 free(piece);
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: {
+                            aliasflen[j] = decode_flags(&(aliasf[j]), piece);
+                            flag_qsort(aliasf[j], 0, aliasflen[j]);
+                            break; 
+                          }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!aliasf[j]) {
+             free(aliasf);
+             free(aliasflen);
+             aliasf = NULL;
+             aliasflen = NULL;
+             numaliasf = 0;
+	     fprintf(stderr,"error: AF table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
+
+/* parse morph alias definitions */
+int  HashMgr::parse_aliasm(char * line, FILE * af)
+{
+   if (numaliasm != 0) {
+      fprintf(stderr,"error: duplicate AM (aliases for morphological descriptions) tables used\n");
+      return 1;
+   }
+   char * tp = line;
+   char * piece;
+   int i = 0;
+   int np = 0;
+   while ((piece=mystrsep(&tp, 0))) {
+       if (*piece != '\0') {
+          switch(i) {
+	     case 0: { np++; break; }
+             case 1: { 
+                       numaliasm = atoi(piece);
+	               if (numaliasm < 1) {
+			  fprintf(stderr,"incorrect number of entries in AM table\n");
+			  free(piece);
+                          return 1;
+                       }
+                       aliasm = (char **) malloc(numaliasm * sizeof(char *));
+                       if (!aliasm) {
+                          numaliasm = 0;
+                          return 1;
+                       }
+                       np++;
+                       break;
+	             }
+	     default: break;
+          }
+          i++;
+       }
+       free(piece);
+   }
+   if (np != 2) {
+      numaliasm = 0;
+      free(aliasm);
+      aliasm = NULL;
+      fprintf(stderr,"error: missing AM alias information\n");
+      return 1;
+   } 
+ 
+   /* now parse the numaliasm lines to read in the remainder of the table */
+   char * nl = line;
+   for (int j=0; j < numaliasm; j++) {
+        if (!fgets(nl,MAXDELEN,af)) return 1;
+        mychomp(nl);
+        tp = nl;
+        i = 0;
+        aliasm[j] = NULL;
+        while ((piece=mystrsep(&tp, 0))) {
+           if (*piece != '\0') {
+               switch(i) {
+                  case 0: {
+		             if (strncmp(piece,"AM",2) != 0) {
+		                 fprintf(stderr,"error: AM table is corrupt\n");
+                                 free(piece);
+                                 numaliasm = 0;
+                                 free(aliasm);
+                                 aliasm = NULL;
+                                 return 1;
+                             }
+                             break;
+		          }
+                  case 1: {
+                            if (complexprefixes) {
+                                if (utf8) reverseword_utf(piece);
+                                    else reverseword(piece);
+                            }
+                            aliasm[j] = mystrdup(piece);
+                            break; }
+		  default: break;
+               }
+               i++;
+           }
+           free(piece);
+        }
+	if (!aliasm[j]) {
+             numaliasm = 0;
+             free(aliasm);
+             aliasm = NULL;
+	     fprintf(stderr,"error: map table is corrupt\n");
+             return 1;
+        }
+   }
+   return 0;
+}
+
+int HashMgr::is_aliasf() {
+    return (aliasf != NULL);
+}
+
+int HashMgr::is_aliasm() {
+    return (aliasm != NULL);
+}
+
+int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
+    if ((index > 0) && (index <= numaliasf)) {
+        *fvec = aliasf[index - 1];
+        return aliasflen[index - 1];
+    }
+    fprintf(stderr,"error: bad flag alias index: %d\n", index);
+    fprintf(stderr,"hiba: %d\n", index);
+    *fvec = NULL;
+    return 0;
+}
+
+char * HashMgr::get_aliasm(int index) {
+    if ((index > 0) && (index <= numaliasm)) return aliasm[index - 1];
+    fprintf(stderr,"error: bad morph. alias index: %d\n", index);
+    return NULL;
+}
diff --git a/src/myspell/hashmgr.hxx b/src/myspell/hashmgr.hxx
index e8b08c3..3a27b1e 100644
--- a/src/myspell/hashmgr.hxx
+++ b/src/myspell/hashmgr.hxx
@@ -1,26 +1,50 @@
 #ifndef _HASHMGR_HXX_
 #define _HASHMGR_HXX_
 
+#include <cstdio>
 #include "htypes.hxx"
 
+enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+
 class HashMgr
 {
   int             tablesize;
   struct hentry * tableptr;
+  int	          userword;
+  flag            flag_mode;
+  int             complexprefixes;
+  int             utf8;
+  int                 numaliasf; // flag vector `compression' with aliases
+  unsigned short **   aliasf;
+  unsigned short *    aliasflen;
+  int                 numaliasm; // morphological desciption `compression' with aliases
+  char **             aliasm;
+
 
 public:
-  HashMgr(const char * tpath);
+  HashMgr(const char * tpath, const char * apath);
   ~HashMgr();
 
   struct hentry * lookup(const char *) const;
   int hash(const char *) const;
   struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
 
+  int put_word(const char * word, int wl, char * ap);
+  int put_word_pattern(const char * word, int wl, const char * pattern);
+  int decode_flags(unsigned short ** result, char * flags);
+  unsigned short        decode_flag(const char * flag);
+  char *                encode_flag(unsigned short flag);
+  int is_aliasf();
+  int is_aliasm();
+  int get_aliasf(int index, unsigned short ** fvec);
+  char * get_aliasm(int index);
+  
 private:
-  HashMgr( const HashMgr & ); // not implemented
-  HashMgr &operator=( const HashMgr & ); // not implemented
   int load_tables(const char * tpath);
-  int add_word(const char * word, int wl, const char * ap, int al);
+  int add_word(const char * word, int wl, unsigned short * ap, int al, const char * desc);
+  int load_config(const char * affpath);
+  int parse_aliasf(char * line, FILE * af);
+  int parse_aliasm(char * line, FILE * af);
 
 };
 
diff --git a/src/myspell/htypes.hxx b/src/myspell/htypes.hxx
index 029e9f2..14a4783 100644
--- a/src/myspell/htypes.hxx
+++ b/src/myspell/htypes.hxx
@@ -1,20 +1,25 @@
 #ifndef _HTYPES_HXX_
 #define _HTYPES_HXX_
 
-#define MAXDELEN    256
+#define MAXDELEN    8192
 
 #define ROTATE_LEN   5
 
 #define ROTATE(v,q) \
    (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
 
+// approx. number  of user defined words
+#define USERWORD 1000
+
 struct hentry
 {
   short    wlen;
   short    alen;
   char *   word;
-  char *   astr;
+  unsigned short * astr;
   struct   hentry * next;
-}; 
+  struct   hentry * next_homonym;
+  char *   description;
+};
 
 #endif
diff --git a/src/myspell/hunspell.cxx b/src/myspell/hunspell.cxx
new file mode 100644
index 0000000..14ea1ad
--- /dev/null
+++ b/src/myspell/hunspell.cxx
@@ -0,0 +1,1616 @@
+#include "license.hunspell"
+#include "license.myspell"
+
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "hunspell.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+Hunspell::Hunspell(const char * affpath, const char * dpath)
+{
+    encoding = NULL;
+    csconv = NULL;
+    utfconv = NULL;
+    utf8 = 0;
+    complexprefixes = 0;
+
+    /* first set up the hash manager */
+    pHMgr = new HashMgr(dpath, affpath);
+
+    /* next set up the affix manager */
+    /* it needs access to the hash manager lookup methods */
+    pAMgr = new AffixMgr(affpath,pHMgr);
+
+    /* get the preferred try string and the dictionary */
+    /* encoding from the Affix Manager for that dictionary */
+    char * try_string = pAMgr->get_try_string();
+    encoding = pAMgr->get_encoding();
+    csconv = get_current_cs(encoding);
+    langnum = pAMgr->get_langnum();
+    utf8 = pAMgr->get_utf8();
+    utfconv = pAMgr->get_utf_conv();
+    complexprefixes = pAMgr->get_complexprefixes();
+    wordbreak = pAMgr->get_breaktable();
+
+    /* and finally set up the suggestion manager */
+    pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
+    if (try_string) free(try_string);
+
+    prevroot = NULL;
+    prevcompound = 0;
+    forbidden_compound = 0;
+}
+
+Hunspell::~Hunspell()
+{
+    if (pSMgr) delete pSMgr;
+    if (pAMgr) delete pAMgr;
+    if (pHMgr) delete pHMgr;
+    pSMgr = NULL;
+    pAMgr = NULL;
+    pHMgr = NULL;
+    csconv= NULL;
+    if (encoding) free(encoding);
+    encoding = NULL;
+}
+
+
+// make a copy of src at destination while removing all leading
+// blanks and removing any trailing periods after recording
+// their presence with the abbreviation flag
+// also since already going through character by character, 
+// set the capitalization type
+// return the length of the "cleaned" (and UTF-8 encoded) word
+
+int Hunspell::cleanword2(char * dest, const char * src, 
+    w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
+{ 
+   unsigned char * p = (unsigned char *) dest;
+   const unsigned char * q = (const unsigned char * ) src;
+   int firstcap = 0;
+
+   // first skip over any leading blanks
+   while ((*q != '\0') && (*q == ' ')) q++;
+   
+   // now strip off any trailing periods (recording their presence)
+   *pabbrev = 0;
+   int nl = strlen((const char *)q);
+   while ((nl > 0) && (*(q+nl-1)=='.')) {
+       nl--;
+       (*pabbrev)++;
+   }
+   
+   // if no characters are left it can't be capitalized
+   if (nl <= 0) { 
+       *pcaptype = NOCAP;
+       *p = '\0';
+       return 0;
+   }
+
+   // now determine the capitalization type of the first nl letters
+   int ncap = 0;
+   int nneutral = 0;
+   *nc = 0;
+
+   if (!utf8) {
+      while (nl > 0) {
+         (*nc)++;
+         if (csconv[(*q)].ccase) ncap++;
+         if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
+         *p++ = *q++;
+         nl--;
+      }
+      // remember to terminate the destination string
+      *p = '\0';
+      if (ncap) {
+        firstcap = csconv[(unsigned char)(*dest)].ccase;
+      }
+   } else {
+      unsigned short idx;
+      *nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
+      // don't check too long words
+      if (*nc >= MAXWORDLEN) return 0;
+      *nc -= *pabbrev;
+      for (int i = 0; i < *nc; i++) {
+         idx = (dest_utf[i].h << 8) + dest_utf[i].l;
+         if (idx != utfconv[idx].clower) ncap++;
+         if (utfconv[idx].cupper == utfconv[idx].clower) nneutral++;
+      }
+      u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);
+      if (ncap) {
+         idx = (dest_utf[0].h << 8) + dest_utf[0].l;
+         firstcap = (idx != utfconv[idx].clower);
+      }
+   }
+
+   // now finally set the captype
+   if (ncap == 0) {
+        *pcaptype = NOCAP;
+   } else if ((ncap == 1) && firstcap) {
+        *pcaptype = INITCAP;
+   } else if ((ncap == *nc) || ((ncap + nneutral) == *nc)) {
+        *pcaptype = ALLCAP;
+   } else if ((ncap > 1) && firstcap) {
+        *pcaptype = HUHINITCAP;
+   } else {
+        *pcaptype = HUHCAP;
+   }
+   return strlen(dest);
+} 
+
+int Hunspell::cleanword(char * dest, const char * src, 
+    int * pcaptype, int * pabbrev)
+{ 
+   unsigned char * p = (unsigned char *) dest;
+   const unsigned char * q = (const unsigned char * ) src;
+   int firstcap = 0;
+
+   // first skip over any leading blanks
+   while ((*q != '\0') && (*q == ' ')) q++;
+   
+   // now strip off any trailing periods (recording their presence)
+   *pabbrev = 0;
+   int nl = strlen((const char *)q);
+   while ((nl > 0) && (*(q+nl-1)=='.')) {
+       nl--;
+       (*pabbrev)++;
+   }
+   
+   // if no characters are left it can't be capitalized
+   if (nl <= 0) { 
+       *pcaptype = NOCAP;
+       *p = '\0';
+       return 0;
+   }
+
+   // now determine the capitalization type of the first nl letters
+   int ncap = 0;
+   int nneutral = 0;
+   int nc = 0;
+
+   if (!utf8) {
+      while (nl > 0) {
+         nc++;
+         if (csconv[(*q)].ccase) ncap++;
+         if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
+         *p++ = *q++;
+         nl--;
+      }
+      // remember to terminate the destination string
+      *p = '\0';
+      firstcap = csconv[(unsigned char)(*dest)].ccase;
+   } else {
+      unsigned short idx;
+      w_char t[MAXWORDLEN];
+      nc = u8_u16(t, MAXWORDLEN, src);
+      for (int i = 0; i < nc; i++) {
+         idx = (t[i].h << 8) + t[i].l;
+         if (idx != utfconv[idx].clower) ncap++;
+         if (utfconv[idx].cupper == utfconv[idx].clower) nneutral++;
+      }
+      u16_u8(dest, MAXWORDUTF8LEN, t, nc);
+      if (ncap) {
+         idx = (t[0].h << 8) + t[0].l;
+         firstcap = (idx != utfconv[idx].clower);
+      }
+   }
+
+   // now finally set the captype
+   if (ncap == 0) {
+        *pcaptype = NOCAP;
+   } else if ((ncap == 1) && firstcap) {
+        *pcaptype = INITCAP;
+   } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
+        *pcaptype = ALLCAP;
+   } else if ((ncap > 1) && firstcap) {
+        *pcaptype = HUHINITCAP;
+   } else {
+        *pcaptype = HUHCAP;
+   }
+   return strlen(dest);
+} 
+       
+
+void Hunspell::mkallcap(char * p)
+{
+  if (utf8) {
+      w_char u[MAXWORDLEN];
+      int nc = u8_u16(u, MAXWORDLEN, p);
+      unsigned short idx;
+      for (int i = 0; i < nc; i++) {
+         idx = (u[i].h << 8) + u[i].l;
+         if (idx != utfconv[idx].cupper) {
+            u[i].h = (unsigned char) (utfconv[idx].cupper >> 8);
+            u[i].l = (unsigned char) (utfconv[idx].cupper & 0x00FF);
+         }
+      }
+      u16_u8(p, MAXWORDUTF8LEN, u, nc);
+  } else {
+    while (*p != '\0') { 
+        *p = csconv[((unsigned char) *p)].cupper;
+        p++;
+    }
+  }
+}
+
+int Hunspell::mkallcap2(char * p, w_char * u, int nc)
+{
+  if (utf8) {
+      unsigned short idx;
+      for (int i = 0; i < nc; i++) {
+         idx = (u[i].h << 8) + u[i].l;
+         if (idx != utfconv[idx].cupper) {
+            u[i].h = (unsigned char) (utfconv[idx].cupper >> 8);
+            u[i].l = (unsigned char) (utfconv[idx].cupper & 0x00FF);
+         }
+      }
+      u16_u8(p, MAXWORDUTF8LEN, u, nc);
+      return strlen(p);  
+  } else {
+    while (*p != '\0') { 
+        *p = csconv[((unsigned char) *p)].cupper;
+        p++;
+    }
+  }
+  return nc;
+}
+
+
+void Hunspell::mkallsmall(char * p)
+{
+    while (*p != '\0') { 
+        *p = csconv[((unsigned char) *p)].clower;
+        p++;
+    }
+}
+
+int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
+{
+  if (utf8) {
+      unsigned short idx;
+      for (int i = 0; i < nc; i++) {
+         idx = (u[i].h << 8) + u[i].l;
+         if (idx != utfconv[idx].clower) {
+            u[i].h = (unsigned char) (utfconv[idx].clower >> 8);
+            u[i].l = (unsigned char) (utfconv[idx].clower & 0x00FF);
+         }
+      }
+      u16_u8(p, MAXWORDUTF8LEN, u, nc);
+      return strlen(p);
+  } else {
+    while (*p != '\0') { 
+        *p = csconv[((unsigned char) *p)].clower;
+        p++;
+    }
+  }
+  return nc;
+}
+
+// convert UTF-8 sharp S codes to latin 1
+char * Hunspell::sharps_u8_l1(char * dest, char * source) {
+    char * p = dest;
+    *p = *source;
+    for (p++, source++; *(source - 1); p++, source++) {
+        *p = *source;
+        if (*source == '�') *--p = '�';
+    }
+    return dest;
+}
+
+// recursive search for right ss-� permutations
+hentry * Hunspell::spellsharps(char * base, char * pos, int n, int repnum, char * tmp) {
+    if ((pos = strstr(pos, "ss")) && (n < MAXSHARPS)) {
+        hentry * h;
+        *pos = '�';
+        *(pos + 1) = '�';
+        if (h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp)) return h;
+        *pos = 's';
+        *(pos + 1) = 's';
+        if (h = spellsharps(base, pos + 2, n + 1, repnum, tmp)) return h;
+    } else if (repnum > 0) {
+        if (utf8) return check(base);
+        return check(sharps_u8_l1(tmp, base));
+    }
+    return NULL;
+}
+
+int Hunspell::is_keepcase(const hentry * rv) {
+    return pAMgr && rv->astr && pAMgr->get_keepcase() &&
+        TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
+}
+
+/* check and insert a word to beginning of the suggestion array */
+int Hunspell::insert_sug(char ***slst, char * word, int *ns) {
+    if (spell(word)) {
+        if (*ns == MAXSUGGESTION) {
+            (*ns)--;
+            free((*slst)[*ns]);
+        }
+        for (int k = *ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
+        (*slst)[0] = mystrdup(word);
+        (*ns)++;
+    }
+    return 0;
+}
+
+int Hunspell::spell(const char * word)
+{
+  struct hentry * rv=NULL;
+  // need larger vector. For example, Turkish capital letter I converted a
+  // 2-byte UTF-8 character (dotless i) by mkallsmall.
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  w_char unicw[MAXWORDLEN + 1];
+  int nc = strlen(word);
+  int wl2;
+  if (utf8) {
+    if (nc >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (nc >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+
+  if (wl == 0) return 1;
+
+  // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
+  enum { NBEGIN, NNUM, NSEP };
+  int nstate = NBEGIN;
+  int i;
+  
+  for (i = 0; (i < wl) && 
+      (((cw[i] <= '9') && (cw[i] >= '0') && (nstate = NNUM)) ||
+          ((nstate == NNUM) && ((cw[i] == ',') ||
+              (cw[i] == '.') || (cw[i] == '-')) && (nstate = NSEP))); i++);     
+  if ((i == wl) && (nstate == NNUM)) return 1;
+
+  // LANG_hu section: number(s) + (percent or degree) with suffixes
+  if (langnum == LANG_hu) {
+    if ((nstate == NNUM) && ((cw[i] == '%') || (cw[i] == '�')) && check(cw + i)) return 1;
+  }
+  // END of LANG_hu section
+
+  switch(captype) {
+     case HUHCAP: 
+     case HUHINITCAP: 
+     case NOCAP: { 
+                    rv = check(cw); 
+                    if ((abbv) && !(rv)) {
+                        memcpy(wspace,cw,wl);
+                        *(wspace+wl) = '.';
+                        *(wspace+wl+1) = '\0';
+                        rv = check(wspace);
+                    }
+                    break;
+                 }
+     case ALLCAP: {
+                    rv = check(cw);
+                    if (rv) break;
+                    if (abbv) {
+                        memcpy(wspace,cw,wl);
+                        *(wspace+wl) = '.';
+                        *(wspace+wl+1) = '\0';
+                        rv = check(wspace);
+                        if (rv) break;
+                    }
+                    if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
+                        char tmpword[MAXWORDUTF8LEN];
+                        wl = mkallsmall2(cw, unicw, nc);
+                        memcpy(wspace,cw,(wl+1));
+                        rv = spellsharps(wspace, wspace, 0, 0, tmpword);
+                        if (!rv) {
+                            wl2 = mkinitcap2(cw, unicw, nc);
+                            rv = spellsharps(cw, cw, 0, 0, tmpword);
+                        }
+                        if ((abbv) && !(rv)) {
+                            *(wspace+wl) = '.';
+                            *(wspace+wl+1) = '\0';
+                            rv = spellsharps(wspace, wspace, 0, 0, tmpword);
+			    if (!rv) {
+		                memcpy(wspace, cw, wl2);
+                                *(wspace+wl2) = '.';
+                                *(wspace+wl2+1) = '\0';
+                                rv = spellsharps(wspace, wspace, 0, 0, tmpword);
+			    }
+                        }
+                        if (rv) break;
+                    }
+                }
+     case INITCAP: { 
+                     wl = mkallsmall2(cw, unicw, nc);
+                     memcpy(wspace,cw,(wl+1));                     
+                     rv = check(wspace);
+                     if (!rv || (is_keepcase(rv) && !((captype == INITCAP) &&
+                           // if CHECKSHARPS: KEEPCASE words with � are allowed
+                           // in INITCAP form, too.
+                           pAMgr->get_checksharps() && ((utf8 && strstr(wspace, "ß")) || 
+                            (!utf8 && strchr(wspace, '�')))))) {
+                        wl2 = mkinitcap2(cw, unicw, nc);
+                        rv = check(cw);
+                        if (rv && (captype == ALLCAP) && is_keepcase(rv)) rv = NULL;
+                     }
+                     if (abbv && !rv) {
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         rv = check(wspace);
+			 if (!rv || is_keepcase(rv)) {
+		            memcpy(wspace, cw, wl2);
+                            *(wspace+wl2) = '.';
+                            *(wspace+wl2+1) = '\0';
+			    rv = check(wspace);
+                            if (rv && ((captype == ALLCAP) && is_keepcase(rv))) rv = NULL;
+			 }
+                     }
+		     break;
+                   }		   
+  }
+  
+  if (rv) return 1;
+
+  // recursive breaking at break points (not good for morphological analysis)
+  if (wordbreak) {
+    char * s;
+    char r;
+    for (int i = 0; i < pAMgr->get_numbreak(); i++) {
+      if (s=(char *) strstr(cw, wordbreak[i])) {
+        r = *s;
+        *s = '\0';
+        // examine 2 sides of the break point
+        if (spell(cw) && spell(s + strlen(wordbreak[i]))) {
+            *s = r;
+            return 1;
+        }
+        *s = r;
+      }
+    }
+  }
+
+  // LANG_hu: compoundings with dashes and n-dashes XXX deprecated!
+  if (langnum == LANG_hu) {
+    int n;
+    // compound word with dash (HU) I18n
+    char * dash;
+    int result = 0;
+    // n-dash
+    if (!wordbreak && (dash=(char *) strstr(cw,"–"))) {
+        *dash = '\0';
+        // examine 2 sides of the dash
+        if (spell(cw) && spell(dash + 3)) {
+            *dash = '�';
+            return 1;
+        }
+        *dash = '�';
+    }
+    if ((dash=(char *) strchr(cw,'-'))) {
+        *dash='\0';      
+        // examine 2 sides of the dash
+        if (dash[1] == '\0') { // base word ending with dash
+            if (spell(cw)) return 1;
+        } else {
+            // first word ending with dash: word-
+            char r2 = *(dash + 1);
+            dash[0]='-';
+            dash[1]='\0';
+            result = spell(cw);
+            dash[1] = r2;
+            dash[0]='\0';
+            if (result && spell(dash+1) && ((strlen(dash+1) > 1) || (dash[1] == 'e') ||
+	        ((dash[1] > '0') && (dash[1] < '9')))) return 1;
+        }
+        // affixed number in correct word
+        if (result && (dash > cw) && (((*(dash-1)<='9') && (*(dash-1)>='0')) || (*(dash-1)>='.'))) {
+            *dash='-';
+	    n = 1;
+	    if (*(dash - n) == '.') n++;
+	    // search first not a number character to left from dash
+	    while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
+	        n++;
+	    }
+	    if ((dash - n) < cw) n--;
+            // numbers: deprecated
+            for(; n >= 1; n--) {
+                if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && check(dash - n)) return 1;
+            }
+        }
+    }
+  }
+  return 0;
+}
+
+struct hentry * Hunspell::check(const char * w)
+{
+  struct hentry * he = NULL;
+  int len;
+  char w2[MAXWORDUTF8LEN];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+  forbidden_compound = 0; // XXX LANG_hu class variable for suggestions (not threadsafe)
+  prevcompound = 0;       // compounding information for Hunspell's pipe interface (not threadsafe)
+  prevroot = NULL;        // root information for Hunspell's pipe interface (not threadsafe)
+
+  // look word in hash table
+  if (pHMgr) he = pHMgr->lookup(word);
+
+  // check forbidden and onlyincompound words
+  if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+    // LANG_hu section: set dash information for suggestions
+    if (langnum == LANG_hu) {
+    	forbidden_compound = 1;
+	if (pAMgr->get_compoundflag() &&
+            TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
+		forbidden_compound = 2;
+	}
+    }
+    return NULL;
+  }
+
+  // he = next not pseudoroot and not onlyincompound homonym or NULL
+  while (he && (he->astr) &&
+    ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
+       (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
+    )) he = he->next_homonym;
+
+  // check with affixes
+  if (!he && pAMgr) {
+     // try stripping off affixes */
+     len = strlen(word);
+     he = pAMgr->affix_check(word, len, 0);
+
+     // check compound restriction
+     if (he && he->astr && pAMgr->get_onlyincompound() &&
+         TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;
+
+     // try check compound word
+     if (he) {
+        if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+	    forbidden_compound = 1; // LANG_hu
+	    return NULL;
+        }
+	prevroot = he->word;
+     } else if (pAMgr->get_compound()) {
+          he = pAMgr->compound_check(word, len, 
+	                          0,0,100,0,NULL,0,NULL,NULL,0);
+          // LANG_hu section: `moving rule' with last dash
+	  if ((!he) && (langnum == LANG_hu) && (word[len-1]=='-')) {
+	     char * dup = mystrdup(word);
+	     dup[len-1] = '\0';
+             he = pAMgr->compound_check(dup, len-1, 
+	                          -5,0,100,0,NULL,1,NULL,NULL,0);
+	     free(dup);
+	  }
+          // end of LANG speficic region          
+	  if (he) {
+		prevroot = he->word;
+		prevcompound = 1;
+	  }
+     }
+
+  }
+
+  return he;
+}
+
+int Hunspell::suggest(char*** slst, const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  w_char unicw[MAXWORDLEN + 1];
+  int nc = strlen(word);
+  if (utf8) {
+    if (nc >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (nc >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
+  if (wl == 0) return 0;
+  int ns = 0;
+  *slst = NULL;
+  int capwords = 0;
+  int ngramsugs = 0;
+
+  switch(captype) {
+     case NOCAP:   { 
+                     ns = pSMgr->suggest(slst, cw, ns);
+                     break;
+                   }
+
+     case INITCAP: { 
+                     capwords = 1;
+                     ns = pSMgr->suggest(slst, cw, ns);
+                     if (ns == -1) break;
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall2(wspace, unicw, nc);
+                     ns = pSMgr->suggest(slst, wspace, ns);
+                     break;
+                   }
+     case HUHINITCAP:
+     case HUHCAP: { 
+                     ns = pSMgr->suggest(slst, cw, ns);
+                     if (ns != -1) {
+                        memcpy(wspace,cw,(wl+1));
+                        mkallsmall2(wspace, unicw, nc);
+                        insert_sug(slst, wspace, &ns);
+                        ns = pSMgr->suggest(slst, wspace, ns);
+                        if (captype == HUHINITCAP) {
+                            mkinitcap2(wspace, unicw, nc);
+                            insert_sug(slst, wspace, &ns);
+                            ns = pSMgr->suggest(slst, wspace, ns);
+                        }
+                     }
+                     break;
+                   }
+
+     case ALLCAP: { 
+                     memcpy(wspace, cw, (wl+1));
+                     mkallsmall2(wspace, unicw, nc);
+                     ns = pSMgr->suggest(slst, wspace, ns);
+                     if (ns == -1) break;
+                     if (pAMgr && pAMgr->get_keepcase()) insert_sug(slst, wspace, &ns);
+                     mkinitcap2(wspace, unicw, nc);
+                     ns = pSMgr->suggest(slst, wspace, ns);
+                     for (int j=0; j < ns; j++) {
+                        mkallcap((*slst)[j]);
+                        if (pAMgr && pAMgr->get_checksharps()) {
+                            char * pos;
+                            if (utf8) {
+                                while (pos = strstr((*slst)[j], "ß")) {
+                                    *pos = 'S';
+                                    *(pos+1) = 'S';
+                                }
+                            } else {
+                                while (pos = strchr((*slst)[j], '�')) {
+                                    (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
+                                    mystrrep((*slst)[j], "�", "SS");
+                                }
+                            }
+                        }
+                     }
+                     break;
+                   }
+  }
+
+  // LANG_hu section: replace '-' with ' ' in Hungarian
+  if ((langnum == LANG_hu) && (forbidden_compound == 2)) {
+      for (int j=0; j < ns; j++) {
+          char * pos = strchr((*slst)[j],'-');
+          if (pos) *pos = ' ';
+      }
+  }
+  // END OF LANG_hu section
+
+  // try ngram approach since found nothing
+  if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
+      ngramsugs = 1;
+      switch(captype) {
+	  case NOCAP: {
+              ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
+              break;
+          }
+          case HUHCAP: {
+              memcpy(wspace,cw,(wl+1));
+              mkallsmall2(wspace, unicw, nc);
+              ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
+              break;
+          }
+          case INITCAP: { 
+              capwords = 1;
+              memcpy(wspace,cw,(wl+1));
+              mkallsmall2(wspace, unicw, nc);
+              ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
+              break;
+          }
+          case ALLCAP: {
+              memcpy(wspace,cw,(wl+1));
+              mkallsmall2(wspace, unicw, nc);
+              ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
+              for (int j=0; j < ns; j++) 
+                  mkallcap((*slst)[j]);
+              break;
+	 }
+      }
+  }
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    for (int j = 0; j < ns; j++) {
+      if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
+    }
+  }
+
+  // capitalize and erase capitalized duplications
+  if (capwords) {
+    int l = 0;
+    for (int j=0; j < ns; j++) {
+      mkinitcap((*slst)[j]);
+      (*slst)[l] = (*slst)[j];
+      for (int k=0; k < l; k++) {
+        if (strcmp((*slst)[k], (*slst)[j]) == 0) {
+          free((*slst)[j]);
+          l--;
+        }
+      }
+      l++;
+    }
+    ns = l;
+  }
+
+  // expand suggestions with dot(s)
+  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
+    for (int j = 0; j < ns; j++) {
+      (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
+      strcat((*slst)[j], word + strlen(word) - abbv);
+    }
+  }
+
+  // suggest keepcase
+  if (pAMgr->get_keepcase()) {
+  switch (captype) {
+    case INITCAP:
+    case ALLCAP: {
+      int l = 0;
+      for (int j=0; j < ns; j++) {
+        if (!spell((*slst)[j])) {
+          char s[MAXSWUTF8L];
+          w_char w[MAXSWL];
+          int len;
+          if (utf8) {
+            len = u8_u16(w, MAXSWL, (*slst)[j]);
+          } else {
+            strcpy(s, (*slst)[j]);
+            len = strlen(s);
+          }
+          int wl = mkallsmall2(s, w, len);
+          free((*slst)[j]);          
+          if (spell(s)) {
+            (*slst)[l] = mystrdup(s);
+            l++;
+          } else {
+            int wl = mkinitcap2(s, w, len);
+            if (spell(s)) {
+              (*slst)[l] = mystrdup(s);
+              l++;
+            }
+          }
+        } else {
+          (*slst)[l] = (*slst)[j];
+          l++;
+        }    
+      }
+      ns = l;
+      l = 0;
+      // remove duplications
+      for (int j=0; j < ns; j++) {
+      (*slst)[l] = (*slst)[j];
+        for (int k=0; k < l; k++) {
+          if (strcmp((*slst)[k], (*slst)[j]) == 0) {
+            free((*slst)[j]);
+            l--;
+          }
+        }
+        l++;
+      }
+      ns = l;
+    }
+  }
+  }
+
+  return ns;
+}
+
+// XXX need UTF-8 support
+int Hunspell::suggest_auto(char*** slst, const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  int wl = strlen(word);
+  if (utf8) {
+    if (wl >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (wl >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  wl = cleanword(cw, word, &captype, &abbv);
+  if (wl == 0) return 0;
+  int ns = 0;
+  *slst = NULL; // HU, nsug in pSMgr->suggest
+  
+  switch(captype) {
+     case NOCAP:   { 
+                     ns = pSMgr->suggest_auto(slst, cw, ns);
+                     if (ns>0) break;
+		     break;
+                   }
+
+     case INITCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     ns = pSMgr->suggest_auto(slst, wspace, ns);
+                     for (int j=0; j < ns; j++)
+                       mkinitcap((*slst)[j]);
+	             ns = pSMgr->suggest_auto(slst, cw, ns);
+                     break;
+		     
+                   }
+
+     case HUHCAP: { 
+                     ns = pSMgr->suggest_auto(slst, cw, ns);
+                     if (ns == 0) {
+                        memcpy(wspace,cw,(wl+1));
+                        mkallsmall(wspace);
+                        ns = pSMgr->suggest_auto(slst, wspace, ns);
+		     }
+                     break;
+                   }
+
+     case ALLCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     ns = pSMgr->suggest_auto(slst, wspace, ns);
+
+                     mkinitcap(wspace);
+                     ns = pSMgr->suggest_auto(slst, wspace, ns);
+
+                     for (int j=0; j < ns; j++)
+                       mkallcap((*slst)[j]);
+                     break;
+                   }
+  }
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    for (int j = 0; j < ns; j++) {
+      if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
+    }
+  }
+
+  // expand suggestions with dot(s)
+  if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
+    for (int j = 0; j < ns; j++) {
+      (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
+      strcat((*slst)[j], word + strlen(word) - abbv);
+    }
+  }
+
+  // replace '-' with ' '
+  if (forbidden_compound == 2) {
+    for (int j=0; j < ns; j++) {
+	char * pos = strchr((*slst)[j],'-');
+	if (pos) *pos = ' ';
+    }
+  }
+  return ns;
+}
+
+// XXX need UTF-8 support
+int Hunspell::stem(char*** slst, const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  int wl = strlen(word);
+  if (utf8) {
+    if (wl >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (wl >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  wl = cleanword(cw, word, &captype, &abbv);
+  if (wl == 0) return 0;
+  
+  int ns = 0;
+
+  *slst = NULL; // HU, nsug in pSMgr->suggest
+  
+  switch(captype) {
+     case HUHCAP:
+     case NOCAP:   { 
+                     ns = pSMgr->suggest_stems(slst, cw, ns);
+
+                     if ((abbv) && (ns == 0)) {
+		         memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         ns = pSMgr->suggest_stems(slst, wspace, ns);
+                     }
+
+		     break;
+                   }
+
+     case INITCAP: { 
+
+	             ns = pSMgr->suggest_stems(slst, cw, ns);
+
+                     if (ns == 0) {
+                        memcpy(wspace,cw,(wl+1));
+                        mkallsmall(wspace);
+                        ns = pSMgr->suggest_stems(slst, wspace, ns);
+
+		     }
+
+                     if ((abbv) && (ns == 0)) {
+		         memcpy(wspace,cw,wl);
+                         mkallsmall(wspace);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         ns = pSMgr->suggest_stems(slst, wspace, ns);
+                     }
+		     
+                     break;
+		     
+                   }
+
+     case ALLCAP: { 
+                     ns = pSMgr->suggest_stems(slst, cw, ns);
+		     if (ns != 0) break;
+		     
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     ns = pSMgr->suggest_stems(slst, wspace, ns);
+
+                     if (ns == 0) {
+		         mkinitcap(wspace);
+		         ns = pSMgr->suggest_stems(slst, wspace, ns);
+		     }
+
+                     if ((abbv) && (ns == 0)) {
+		         memcpy(wspace,cw,wl);
+                         mkallsmall(wspace);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         ns = pSMgr->suggest_stems(slst, wspace, ns);
+                     }
+
+
+                     break;
+                   }
+  }
+  
+  return ns;
+}
+
+int Hunspell::suggest_pos_stems(char*** slst, const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  int wl = strlen(word);
+  if (utf8) {
+    if (wl >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (wl >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  wl = cleanword(cw, word, &captype, &abbv);
+  if (wl == 0) return 0;
+  
+  int ns = 0; // ns=0 = normalized input
+
+  *slst = NULL; // HU, nsug in pSMgr->suggest
+  
+  switch(captype) {
+     case HUHCAP:
+     case NOCAP:   { 
+                     ns = pSMgr->suggest_pos_stems(slst, cw, ns);
+
+                     if ((abbv) && (ns == 0)) {
+		         memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
+                     }
+
+		     break;
+                   }
+
+     case INITCAP: { 
+
+	             ns = pSMgr->suggest_pos_stems(slst, cw, ns);
+
+                     if (ns == 0 || ((*slst)[0][0] == '#')) {
+                        memcpy(wspace,cw,(wl+1));
+                        mkallsmall(wspace);
+                        ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
+		     }
+		     
+                     break;
+		     
+                   }
+
+     case ALLCAP: { 
+                     ns = pSMgr->suggest_pos_stems(slst, cw, ns);
+		     if (ns != 0) break;
+		     
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
+
+                     if (ns == 0) {
+		         mkinitcap(wspace);
+		         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
+		     }
+                     break;
+                   }
+  }
+
+  return ns;
+}
+
+char * Hunspell::get_dic_encoding()
+{
+  return encoding;
+}
+
+const char * Hunspell::get_wordchars()
+{
+  return pAMgr->get_wordchars();
+}
+
+unsigned short * Hunspell::get_wordchars_utf16(int * len)
+{
+  return pAMgr->get_wordchars_utf16(len);
+}
+
+char * Hunspell::get_prevroot()
+{
+  return prevroot; // XXX not stateless, not for OOo
+}
+
+int Hunspell::get_prevcompound()
+{
+  return prevcompound; // XXX not stateless, not for OOo
+}
+
+int Hunspell::get_forbidden_compound()
+{
+  return forbidden_compound; // XXX not stateless, not for OOo
+}
+
+void Hunspell::mkinitcap(char * p)
+{
+  if (!utf8) {
+    if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
+  } else {
+      int len;
+      w_char u[MAXWORDLEN];
+      len = u8_u16(u, MAXWORDLEN, p);
+      unsigned short i = utfconv[(u[0].h << 8) + u[0].l].cupper;
+      u[0].h = (unsigned char) (i >> 8);
+      u[0].l = (unsigned char) (i & 0x00FF);
+      u16_u8(p, MAXWORDUTF8LEN, u, len);
+  }
+}
+
+int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
+{
+  if (!utf8) {
+    if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
+  } else if (nc > 0) {
+      unsigned short i = utfconv[(u[0].h << 8) + u[0].l].cupper;
+      u[0].h = (unsigned char) (i >> 8);
+      u[0].l = (unsigned char) (i & 0x00FF);
+      u16_u8(p, MAXWORDUTF8LEN, u, nc);
+      return strlen(p);
+  }
+  return nc;
+}
+
+struct cs_info * Hunspell::get_csconv()
+{
+  return csconv;
+}
+
+struct unicode_info2 * Hunspell::get_utf_conv()
+{
+  return utfconv;
+}
+
+int Hunspell::put_word(const char * word)
+{
+    if (pHMgr) {
+	return pHMgr->put_word(word, strlen(word), NULL);
+    }
+    return 0;
+}
+
+int Hunspell::put_word_suffix(const char * word, const char * suffix)
+{
+    if (pHMgr) {
+	return pHMgr->put_word(word, strlen(word), (char *) suffix);
+    }
+    return 0;
+}
+
+int Hunspell::put_word_pattern(const char * word, const char * pattern)
+{
+    if (pHMgr) {
+	return pHMgr->put_word_pattern(word, strlen(word), pattern);
+    }
+    return 0;
+}
+
+const char * Hunspell::get_version()
+{
+  return pAMgr->get_version();
+}
+
+// XXX need UTF-8 support
+char * Hunspell::morph(const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  int wl = strlen(word);
+  if (utf8) {
+    if (wl >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (wl >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  wl = cleanword(cw, word, &captype, &abbv);
+  if (wl == 0) {
+      if (abbv) {
+          for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
+          cw[wl] = '\0';
+          abbv = 0;
+      } else return 0;
+  }
+
+  char result[MAXLNLEN];
+  char * st = NULL;
+  
+  *result = '\0';
+
+  int n = 0;
+  int n2 = 0;
+  int n3 = 0;
+
+  // test numbers
+  // LANG_hu section: set dash information for suggestions
+  if (langnum == LANG_hu) {
+  while ((n < wl) && 
+  	(((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
+  	n++;
+	if ((cw[n] == '.') || (cw[n] == ',')) {
+		if (((n2 == 0) && (n > 3)) || 
+			((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
+		n2++;
+		n3 = n;
+	}
+  }
+
+  if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;
+  if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='�')) && check(cw+n))) {
+  	strcat(result, cw);
+	result[n - 1] = '\0';
+  	if (n == wl) {
+		st = pSMgr->suggest_morph(cw + n - 1);
+		if (st) {
+			strcat(result, st);
+			free(st);
+		}
+	} else {
+		char sign = cw[n];
+		cw[n] = '\0';
+		st = pSMgr->suggest_morph(cw + n - 1);
+		if (st) {
+			strcat(result, st);
+			free(st);
+		}
+		strcat(result, "+"); // XXX SPEC. MORPHCODE
+		cw[n] = sign;
+		st = pSMgr->suggest_morph(cw + n);
+		if (st) {
+			strcat(result, st);
+			free(st);
+		}
+	}
+	return mystrdup(result);
+  }
+  }
+  // END OF LANG_hu section
+  
+  switch(captype) {
+     case NOCAP:   { 
+                     st = pSMgr->suggest_morph(cw);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+		         		memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         st = pSMgr->suggest_morph(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     }
+					 break;
+                   }
+     case INITCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph(wspace);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }					 
+	                 st = pSMgr->suggest_morph(cw);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+		         		 memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                     	 mkallsmall(wspace);
+                         st = pSMgr->suggest_morph(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     	 mkinitcap(wspace);
+                         st = pSMgr->suggest_morph(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     }
+		     break;
+                   }
+     case HUHCAP: { 
+                     st = pSMgr->suggest_morph(cw);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }
+#if 0
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+#endif
+                     break;
+                 }
+     case ALLCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     st = pSMgr->suggest_morph(wspace);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }		     
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+		             mkinitcap(wspace);
+		             st = pSMgr->suggest_morph(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+                     	memcpy(wspace,cw,(wl+1));
+                        *(wspace+wl) = '.';
+                        *(wspace+wl+1) = '\0';
+                        if (*result) strcat(result, "\n");
+                     	st = pSMgr->suggest_morph(wspace);
+                     	if (st) {
+                        	strcat(result, st);
+                        	free(st);
+                     	}		     
+                        mkallsmall(wspace);
+                        st = pSMgr->suggest_morph(wspace);
+                        if (st) {
+                          if (*result) strcat(result, "\n");
+                          strcat(result, st);
+                          free(st);
+                        }
+		                mkinitcap(wspace);
+		                st = pSMgr->suggest_morph(wspace);
+                        if (st) {
+                          if (*result) strcat(result, "\n");
+                          strcat(result, st);
+                          free(st);
+                        }
+					 }
+                     break;
+                   }
+  }
+
+  if (result && (*result)) {
+    // word reversing wrapper for complex prefixes
+    if (complexprefixes) {
+      if (utf8) reverseword_utf(result); else reverseword(result);
+    }
+    return mystrdup(result);
+  }
+
+  // compound word with dash (HU) I18n
+  char * dash;
+  int nresult = 0;
+  // LANG_hu section: set dash information for suggestions
+  if ((langnum == LANG_hu) && (dash=(char *) strchr(cw,'-'))) {
+      *dash='\0';      
+      // examine 2 sides of the dash
+      if (dash[1] == '\0') { // base word ending with dash
+        if (spell(cw)) return pSMgr->suggest_morph(cw);
+      } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
+        if (spell(cw) && (spell("-e"))) {
+			st = pSMgr->suggest_morph(cw);
+			if (st) {
+				strcat(result, st);
+				free(st);
+			}
+			strcat(result,"+"); // XXX spec. separator in MORPHCODE
+			st = pSMgr->suggest_morph("-e");
+			if (st) {
+				strcat(result, st);
+				free(st);
+			}
+			return mystrdup(result);
+		}
+      } else {
+      // first word ending with dash: word- XXX ???
+        char r2 = *(dash + 1);
+        dash[0]='-';
+        dash[1]='\0';
+        nresult = spell(cw);
+        dash[1] = r2;
+        dash[0]='\0';
+        if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
+	    	((dash[1] > '0') && (dash[1] < '9')))) {
+			    st = morph(cw);
+			    if (st) {
+			        strcat(result, st);
+				    free(st);
+			    	strcat(result,"+"); // XXX spec. separator in MORPHCODE
+			    }
+			    st = morph(dash+1);
+			    if (st) {
+				    strcat(result, st);
+				    free(st);
+			    }
+			    return mystrdup(result);			
+			}
+      }
+      // affixed number in correct word
+     if (nresult && (dash > cw) && (((*(dash-1)<='9') && 
+	 		(*(dash-1)>='0')) || (*(dash-1)=='.'))) {
+         *dash='-';
+	 n = 1;
+	 if (*(dash - n) == '.') n++;
+	 // search first not a number character to left from dash
+	 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
+	    n++;
+	 }
+	 if ((dash - n) < cw) n--;
+	 // numbers: valami1000000-hoz
+	 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
+	 // 56-hoz, 6-hoz
+	 for(; n >= 1; n--) {
+	    if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && check(dash - n)) {
+	   	    strcat(result, cw);
+		    result[dash - cw - n] = '\0';
+			st = pSMgr->suggest_morph(dash - n);
+			if (st) {
+		        strcat(result, st);
+				free(st);
+			}
+		    return mystrdup(result);			
+	    }
+	 }
+     }
+  }
+  return NULL;
+}
+
+// XXX need UTF-8 support
+char * Hunspell::morph_with_correction(const char * word)
+{
+  char cw[MAXWORDUTF8LEN + 4];
+  char wspace[MAXWORDUTF8LEN + 4];
+  if (! pSMgr) return 0;
+  int wl = strlen(word);
+  if (utf8) {
+    if (wl >= MAXWORDUTF8LEN) return 0;
+  } else {
+    if (wl >= MAXWORDLEN) return 0;
+  }
+  int captype = 0;
+  int abbv = 0;
+  wl = cleanword(cw, word, &captype, &abbv);
+  if (wl == 0) return 0;
+
+  char result[MAXLNLEN];
+  char * st = NULL;
+  
+  *result = '\0';
+  
+  
+  switch(captype) {
+     case NOCAP:   { 
+                     st = pSMgr->suggest_morph_for_spelling_error(cw);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+		         		memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     }
+					 break;
+                   }
+     case INITCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }					 
+	                 st = pSMgr->suggest_morph_for_spelling_error(cw);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+		         		 memcpy(wspace,cw,wl);
+                         *(wspace+wl) = '.';
+                         *(wspace+wl+1) = '\0';
+                     	 mkallsmall(wspace);
+                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     	 mkinitcap(wspace);
+                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     	 if (st) {
+                            if (*result) strcat(result, "\n");
+                            strcat(result, st);
+                            free(st);
+						 }
+                     }
+		     break;
+                   }
+     case HUHCAP: { 
+                     st = pSMgr->suggest_morph_for_spelling_error(cw);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }
+                     memcpy(wspace,cw,(wl+1));
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }		     
+                     break;
+                 }
+     case ALLCAP: { 
+                     memcpy(wspace,cw,(wl+1));
+                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     if (st) {
+                        strcat(result, st);
+                        free(st);
+                     }		     
+                     mkallsmall(wspace);
+                     st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+		             mkinitcap(wspace);
+		             st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     if (st) {
+                        if (*result) strcat(result, "\n");
+                        strcat(result, st);
+                        free(st);
+                     }
+					 if (abbv) {
+                     	memcpy(wspace,cw,(wl+1));
+                        *(wspace+wl) = '.';
+                        *(wspace+wl+1) = '\0';
+                        if (*result) strcat(result, "\n");
+                     	st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                     	if (st) {
+                        	strcat(result, st);
+                        	free(st);
+                     	}		     
+                        mkallsmall(wspace);
+                        st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                        if (st) {
+                          if (*result) strcat(result, "\n");
+                          strcat(result, st);
+                          free(st);
+                        }
+		                mkinitcap(wspace);
+		                st = pSMgr->suggest_morph_for_spelling_error(wspace);
+                        if (st) {
+                          if (*result) strcat(result, "\n");
+                          strcat(result, st);
+                          free(st);
+                        }
+					 }
+                     break;
+                   }
+  }
+
+  if (result) return mystrdup(result);
+  return NULL;
+}
+
+/* analyze word
+ * return line count 
+ * XXX need a better data structure for morphological analysis */
+int Hunspell::analyze(char ***out, const char *word) {
+  int  n = 0;
+  if (!word) return 0;
+  char * m = morph(word);
+  if(!m) return 0;
+  if (!out) return line_tok(m, out);
+
+  // without memory allocation
+  /* BUG missing buffer size checking */
+  int i, p;
+  for(p = 0, i = 0; m[i]; i++) {
+     if(m[i] == '\n' || !m[i+1]) {
+       n++;
+       strncpy((*out)[n++], m + p, i - p + 1);
+       if (m[i] == '\n') (*out)[n++][i - p] = '\0';
+       if(!m[i+1]) break;
+       p = i + 1;	 
+     }
+  }
+  free(m);
+  return n;
+}
+
diff --git a/src/myspell/hunspell.dsp b/src/myspell/hunspell.dsp
new file mode 100644
index 0000000..05e072f
--- /dev/null
+++ b/src/myspell/hunspell.dsp
@@ -0,0 +1,164 @@
+# Microsoft Developer Studio Project File - Name="hunspell" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Static Library" 0x0104
+
+CFG=hunspell - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "hunspell.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "hunspell.mak" CFG="hunspell - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "hunspell - Win32 Release" (based on "Win32 (x86) Static Library")
+!MESSAGE "hunspell - Win32 Debug" (based on "Win32 (x86) Static Library")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "hunspell - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD CPP /nologo /W3 /GX /O2 /D "W32" /D "NDEBUG" /D "_MBCS" /D "_LIB" /YX /FD /c
+# ADD BASE RSC /l 0x40e /d "NDEBUG"
+# ADD RSC /l 0x40e /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo
+
+!ELSEIF  "$(CFG)" == "hunspell - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ  /c
+# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "W32" /D "_DEBUG" /D "_MBCS" /D "_LIB" /YX /FD /GZ  /c
+# ADD BASE RSC /l 0x40e /d "_DEBUG"
+# ADD RSC /l 0x40e /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LIB32=link.exe -lib
+# ADD BASE LIB32 /nologo
+# ADD LIB32 /nologo
+
+!ENDIF 
+
+# Begin Target
+
+# Name "hunspell - Win32 Release"
+# Name "hunspell - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\affentry.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\affixmgr.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\csutil.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\dictmgr.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\hashmgr.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\hunspell.cxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\suggestmgr.cxx
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\affentry.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\affixmgr.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\atypes.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\baseaffix.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\csutil.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\dictmgr.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\hashmgr.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\htypes.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\langnum.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\hunspell.hxx
+# End Source File
+# Begin Source File
+
+SOURCE=.\suggestmgr.hxx
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/src/myspell/hunspell.hxx b/src/myspell/hunspell.hxx
new file mode 100644
index 0000000..5860fa8
--- /dev/null
+++ b/src/myspell/hunspell.hxx
@@ -0,0 +1,142 @@
+#include "hashmgr.hxx"
+#include "affixmgr.hxx"
+#include "suggestmgr.hxx"
+#include "csutil.hxx"
+#include "langnum.hxx"
+
+#define NOCAP   0
+#define INITCAP 1
+#define ALLCAP  2
+#define HUHCAP  3
+#define HUHINITCAP  4
+
+#define MAXSUGGESTION 15
+#define MAXSHARPS 5
+
+#ifdef W32
+#define DLLTEST2_API __declspec(dllexport)
+#endif
+
+#ifndef _MYSPELLMGR_HXX_
+#define _MYSPELLMGR_HXX_
+
+#ifdef W32
+class DLLTEST2_API Hunspell
+#else
+class Hunspell
+#endif
+{
+  AffixMgr*       pAMgr;
+  HashMgr*        pHMgr;
+  SuggestMgr*     pSMgr;
+  char *          encoding;
+  struct cs_info * csconv;
+  struct unicode_info2 * utfconv;
+  int             langnum;
+  int             utf8;
+  int             complexprefixes;
+  char**          wordbreak;
+
+/* XXX not stateless variables for compound handling */
+  char *	  prevroot;
+  int             prevcompound;
+
+/* forbidden_compound:
+ * 0 = not forbidden
+ * 1 = forbidden
+ * 2 = forbidden compound (written without dash in Hungarian)
+ */
+  int		  forbidden_compound;
+  
+
+public:
+
+  /* Hunspell(aff, dic) - constructor of Hunspell class
+   * input: path of affix file and dictionary file
+   */
+  
+  Hunspell(const char * affpath, const char * dpath);
+
+  ~Hunspell();
+
+  /* spell(word) - spellcheck word
+   * output: 0 = bad word, not 0 = good word
+   */
+   
+  int spell(const char *);
+
+  /* suggest(suggestions, word) - search suggestions
+   * input: pointer to an array of strings pointer and the (bad) word
+   *   array of strings pointer (here *slst) may not be initialized
+   * output: number of suggestions in string array, and suggestions in
+   *   a newly allocated array of strings (*slts will be NULL when number
+   *   of suggestion equals 0.)
+   */
+
+  int suggest(char*** slst, const char * word);
+
+  /* handling custom dictionary */
+
+  int put_word(const char * word);
+
+  /* suffix is an affix flag string, similarly in dictionary files */
+  
+  int put_word_suffix(const char * word, const char * suffix);
+  
+  /* pattern is a sample dictionary word 
+   * put word into custom dictionary with affix flags of pattern word
+   */
+  
+  int put_word_pattern(const char * word, const char * pattern);
+
+  /* other */
+
+  char * get_dic_encoding();
+  const char * get_wordchars();
+  unsigned short * get_wordchars_utf16(int * len);
+  struct cs_info * get_csconv();
+  struct unicode_info2 * get_utf_conv();
+  const char * get_version();
+
+  /* experimental functions */
+
+  /* morphological analysis */
+  
+  char * morph(const char * word);
+  int analyze(char*** out, const char *word);
+
+  char * morph_with_correction(const char * word);
+
+  /* stemmer function */
+  
+  int stem(char*** slst, const char * word);
+
+  /* spec. suggestions */
+  int suggest_auto(char*** slst, const char * word);
+  int suggest_pos_stems(char*** slst, const char * word);
+  char * get_possible_root();
+
+  /* not threadsafe functions for Hunspell command line API */
+  
+  char * get_prevroot();
+  int get_prevcompound();
+  int get_forbidden_compound();
+
+private:
+   int    cleanword(char *, const char *, int * pcaptype, int * pabbrev);
+   int    cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
+   void   mkinitcap(char *);
+   int    mkinitcap2(char * p, w_char * u, int nc);
+   void   mkallcap(char *);
+   int    mkallcap2(char * p, w_char * u, int nc);
+   void   mkallsmall(char *);
+   int    mkallsmall2(char * p, w_char * u, int nc);
+   struct hentry * check(const char *);
+   char * sharps_u8_l1(char * dest, char * source);
+   hentry * spellsharps(char * base, char *, int, int, char * tmp);
+   int    is_keepcase(const hentry * rv);
+   int    insert_sug(char ***slst, char * word, int *ns);
+
+};
+
+#endif
diff --git a/src/myspell/myspell.cxx b/src/myspell/myspell.cxx
deleted file mode 100644
index fcdbaa1..0000000
--- a/src/myspell/myspell.cxx
+++ /dev/null
@@ -1,302 +0,0 @@
-#include "license.readme"
-
-#include <cstring>
-#include <cstdlib>
-#include <cstdio>
-
-#include "enchant_myspell.hxx"
-
-#ifndef WINDOWS
-using namespace std;
-#endif
-
-
-MySpell::MySpell(const char * affpath, const char * dpath)
-{
-    encoding = NULL;
-    csconv = NULL;
-
-    /* first set up the hash manager */
-    pHMgr = new HashMgr(dpath);
-
-    /* next set up the affix manager */
-    /* it needs access to the hash manager lookup methods */
-    pAMgr = new AffixMgr(affpath,pHMgr);
-
-    /* get the preferred try string and the dictionary */
-    /* encoding from the Affix Manager for that dictionary */
-    char * try_string = pAMgr->get_try_string();
-    encoding = pAMgr->get_encoding();
-    csconv = get_current_cs(encoding);
-
-    /* and finally set up the suggestion manager */
-    maxSug = 15;
-    pSMgr = new SuggestMgr(try_string, maxSug, pAMgr);
-    if (try_string) free(try_string);
-}
-
-
-MySpell::~MySpell()
-{
-    if (pSMgr) delete pSMgr;
-    if (pAMgr) delete pAMgr;
-    if (pHMgr) delete pHMgr;
-    pSMgr = NULL;
-    pAMgr = NULL;
-    pHMgr = NULL;
-    csconv= NULL;
-    if (encoding) free(encoding);
-    encoding = NULL;
-}
-
-
-// make a copy of src at destination while removing all leading
-// blanks and removing any trailing periods after recording
-// their presence with the abbreviation flag
-// also since already going through character by character, 
-// set the capitalization type
-// return the length of the "cleaned" word
-
-int MySpell::cleanword(char * dest, const char * src, int * pcaptype, int * pabbrev)
-{ 
-
-  // with the new breakiterator code this should not be needed anymore
-   const char * special_chars = "._#$%&()* +,-/:;<=>[]\\^`{|}~\t \x0a\x0d\x01\'\"";
-
-   unsigned char * p = (unsigned char *) dest;
-   const unsigned char * q = (const unsigned char * ) src;
-
-   // first skip over any leading special characters
-   while ((*q != '\0') && (strchr(special_chars,(int)(*q)))) q++;
-   
-   // now strip off any trailing special characters 
-   // if a period comes after a normal char record its presence
-   *pabbrev = 0;
-   int nl = strlen((const char *)q);
-   while ((nl > 0) && (strchr(special_chars,(int)(*(q+nl-1))))) {
-       nl--;
-   }
-   if ( *(q+nl) == '.' ) *pabbrev = 1;
-   
-   // if no characters are left it can't be an abbreviation and can't be capitalized
-   if (nl <= 0) { 
-       *pcaptype = NOCAP;
-       *pabbrev = 0;
-       *p = '\0';
-       return 0;
-   }
-
-   // now determine the capitalization type of the first nl letters
-   int ncap = 0;
-   int nneutral = 0;
-   int nc = 0;
-   while (nl > 0) {
-       nc++;
-       if (csconv[(*q)].ccase) ncap++;
-       if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
-       *p++ = *q++;
-       nl--;
-   }
-   // remember to terminate the destination string
-   *p = '\0';
-
-   // now finally set the captype
-   if (ncap == 0) {
-        *pcaptype = NOCAP;
-   } else if ((ncap == 1) && csconv[(unsigned char)(*dest)].ccase) {
-        *pcaptype = INITCAP;
-  } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
-        *pcaptype = ALLCAP;
-  } else {
-        *pcaptype = HUHCAP;
-  }
-  return nc;
-} 
-       
-
-int MySpell::spell(const char * word)
-{
-  char * rv=NULL;
-  char cw[MAXWORDLEN+1];
-  char wspace[MAXWORDLEN+1];
-
-  int wl = strlen(word);
-  if (wl > (MAXWORDLEN - 1)) return 0;
-  int captype = 0;
-  int abbv = 0;
-  wl = cleanword(cw, word, &captype, &abbv);
-  if (wl == 0) return 1;
-
-  switch(captype) {
-     case HUHCAP:
-     case NOCAP:   { 
-                     rv = check(cw); 
-                     if ((abbv) && !(rv)) {
-		         memcpy(wspace,cw,wl);
-                         *(wspace+wl) = '.';
-                         *(wspace+wl+1) = '\0';
-                         rv = check(wspace);
-                     }
-                     break;
-                   }
-
-     case ALLCAP:  {
-                     memcpy(wspace,cw,(wl+1));
-                     mkallsmall(wspace, csconv);
-                     rv = check(wspace);
-                     if (!rv) {
-                        mkinitcap(wspace, csconv);
-                        rv = check(wspace);
-                     }
-                     if (!rv) rv = check(cw);
-                     if ((abbv) && !(rv)) {
-		         memcpy(wspace,cw,wl);
-                         *(wspace+wl) = '.';
-                         *(wspace+wl+1) = '\0';
-                         rv = check(wspace);
-                     }
-                     break; 
-                   }
-     case INITCAP: { 
-                     memcpy(wspace,cw,(wl+1));
-                     mkallsmall(wspace, csconv);
-                     rv = check(wspace);
-                     if (!rv) rv = check(cw);
-                     if ((abbv) && !(rv)) {
-		         memcpy(wspace,cw,wl);
-                         *(wspace+wl) = '.';
-                         *(wspace+wl+1) = '\0';
-                         rv = check(wspace);
-                     }
-                     break; 
-                   }
-  }
-  if (rv) return 1;
-  return 0;
-}
-
-
-char * MySpell::check(const char * word)
-{
-  struct hentry * he = NULL;
-  if (pHMgr)
-     he = pHMgr->lookup (word);
-
-  if ((he == NULL) && (pAMgr)) {
-     // try stripping off affixes */
-     he = pAMgr->affix_check(word, strlen(word));
-
-     // try check compound word
-     if ((he == NULL) && (pAMgr->get_compound())) {
-          he = pAMgr->compound_check(word, strlen(word), (pAMgr->get_compound())[0]);
-     }
-
-  }
-
-  if (he) return he->word;
-  return NULL;
-}
-
-
-
-int MySpell::suggest(char*** slst, const char * word)
-{
-  char cw[MAXWORDLEN+1];
-  char wspace[MAXWORDLEN+1];
-  if (! pSMgr) return 0;
-  int wl = strlen(word);
-  if (wl > (MAXWORDLEN-1)) return 0;
-  int captype = 0;
-  int abbv = 0;
-  wl = cleanword(cw, word, &captype, &abbv);
-  if (wl == 0) return 0;
-
-  int ns = 0;
-  char ** wlst = (char **) calloc(maxSug, sizeof(char *));
-  if (wlst == NULL) return 0;
-
-  switch(captype) {
-     case NOCAP:   { 
-                     ns = pSMgr->suggest(wlst, ns, cw); 
-                     break;
-                   }
-
-     case INITCAP: { 
-
-                     memcpy(wspace,cw,(wl+1));
-                     mkallsmall(wspace, csconv);
-                     ns = pSMgr->suggest(wlst, ns, wspace);
-                     if (ns > 0) {
-                       for (int j=0; j < ns; j++)
-                         mkinitcap(wlst[j], csconv);
-                     }
-                     ns = pSMgr->suggest(wlst,ns,cw); 
-                     break;
-                   }
-
-     case HUHCAP: { 
-                     ns = pSMgr->suggest(wlst, ns, cw);
-                     if (ns != -1) {
-                       memcpy(wspace,cw,(wl+1));
-                       mkallsmall(wspace, csconv);
-                       ns = pSMgr->suggest(wlst, ns, wspace);
-                     } 
-                     break;
-                   }
-
-     case ALLCAP: { 
-                     memcpy(wspace,cw,(wl+1));
-                     mkallsmall(wspace, csconv);
-                     ns = pSMgr->suggest(wlst, ns, wspace);
-                     if (ns > 0) {
-                       for (int j=0; j < ns; j++)
-                         mkallcap(wlst[j], csconv);
-                     } 
-                     if (ns != -1) 
-                         ns = pSMgr->suggest(wlst, ns , cw);
-                     break;
-                   }
-  }
-  if (ns > 0) {
-       *slst = wlst;
-       return ns;
-  }
-  // try ngram approach since found nothing
-  if (ns == 0) { 
-     ns = pSMgr->ngsuggest(wlst, cw, pHMgr);
-     if (ns) {
-         switch(captype) {
-	    case NOCAP:  break;
-            case HUHCAP: break; 
-            case INITCAP: { 
-                            for (int j=0; j < ns; j++)
-                              mkinitcap(wlst[j], csconv);
-                          }
-                          break;
-
-            case ALLCAP: { 
-                            for (int j=0; j < ns; j++)
-                              mkallcap(wlst[j], csconv);
-                         } 
-                         break;
-	 }
-         *slst = wlst;
-         return ns;
-     }
-  }
-  if (ns < 0) {
-     // we ran out of memory - we should free up as much as possible
-     for (int i=0;i<maxSug; i++)
-	 if (wlst[i] != NULL) free(wlst[i]);
-  }
-  if (wlst) free(wlst);
-  *slst = NULL;
-  return 0;
-}
-
-
-char * MySpell::get_dic_encoding()
-{
-  return encoding;
-}
-
diff --git a/src/myspell/myspell_checker.cpp b/src/myspell/myspell_checker.cpp
index f84358a..01e8845 100644
--- a/src/myspell/myspell_checker.cpp
+++ b/src/myspell/myspell_checker.cpp
@@ -38,11 +38,8 @@
 #include "enchant.h"
 #include "enchant-provider.h"
 
-#ifdef WITH_SYSTEM_MYSPELL
-#include <myspell.hxx>
-#else
-#include "enchant_myspell.hxx"
-#endif
+/* built against hunspell 1.1.3 on January 13, 2006 */
+#include "hunspell.hxx"
 
 ENCHANT_PLUGIN_DECLARE("Myspell")
 
@@ -66,7 +63,7 @@ public:
 private:
 	GIConv  m_translate_in; /* Selected translation from/to Unicode */
 	GIConv  m_translate_out;
-	MySpell *myspell;
+	Hunspell *myspell;
 };
 
 /***************************************************************************/
@@ -257,7 +254,7 @@ MySpellChecker::requestDictionary(const char *szLang)
 	aff = g_strdup(dic);
 	int len_dic = strlen(dic);
 	strcpy(aff+len_dic-3, "aff");
-	myspell = new MySpell(aff, dic);
+	myspell = new Hunspell(aff, dic);
 	g_free(dic);
 	g_free(aff);
 	char *enc = myspell->get_dic_encoding();
diff --git a/src/myspell/suggestmgr.cxx b/src/myspell/suggestmgr.cxx
index 4e9c051..fe451cc 100644
--- a/src/myspell/suggestmgr.cxx
+++ b/src/myspell/suggestmgr.cxx
@@ -1,4 +1,5 @@
-#include "license.readme"
+#include "license.hunspell"
+#include "license.myspell"
 
 #include <cstdlib>
 #include <cctype>
@@ -7,12 +8,10 @@
 
 #include "suggestmgr.hxx"
 
-#ifndef WINDOWS
+#ifndef W32
 using namespace std;
 #endif
 
-extern char * mystrdup(const char *);
-
 
 SuggestMgr::SuggestMgr(const char * tryme, int maxn, 
                        AffixMgr * aptr)
@@ -21,13 +20,41 @@ SuggestMgr::SuggestMgr(const char * tryme, int maxn,
   // register affix manager and check in string of chars to 
   // try when building candidate suggestions
   pAMgr = aptr;
-  ctry = mystrdup(tryme);
+
   ctryl = 0;
-  if (ctry)
-    ctryl = strlen(ctry);
+  ctry = NULL;
+  ctry_utf = NULL;
+
   maxSug = maxn;
-  nosplitsugs=(0==1);
-  if (pAMgr) pAMgr->get_nosplitsugs();
+  nosplitsugs = 0;
+  maxngramsugs = MAXNGRAMSUGS;
+
+  utf8 = 0;
+  utfconv = NULL;
+  complexprefixes = 0;
+
+  if (pAMgr) {
+        char * enc = pAMgr->get_encoding();
+        csconv = get_current_cs(enc);
+        free(enc);
+	nosplitsugs = pAMgr->get_nosplitsugs();
+        if (pAMgr->get_maxngramsugs() >= 0) maxngramsugs = pAMgr->get_maxngramsugs();
+        utf8 = pAMgr->get_utf8();
+        utfconv = pAMgr->get_utf_conv();
+        complexprefixes = pAMgr->get_complexprefixes();
+  }
+
+  if (tryme) {  
+    if (utf8) {
+        w_char t[MAXSWL];    
+        ctryl = u8_u16(t, MAXSWL, tryme);
+        ctry_utf = (w_char *) malloc(ctryl * sizeof(w_char));
+        memcpy(ctry_utf, t, ctryl * sizeof(w_char));
+    } else {
+        ctry = mystrdup(tryme);
+        ctryl = strlen(ctry);
+    }
+  }
 }
 
 
@@ -36,6 +63,8 @@ SuggestMgr::~SuggestMgr()
   pAMgr = NULL;
   if (ctry) free(ctry);
   ctry = NULL;
+  if (ctry_utf) free(ctry_utf);
+  ctry_utf = NULL;
   ctryl = 0;
   maxSug = 0;
 }
@@ -45,67 +74,182 @@ SuggestMgr::~SuggestMgr()
 // generate suggestions for a mispelled word
 //    pass in address of array of char * pointers
 
-int SuggestMgr::suggest(char** wlst, int ns, const char * word)
+int SuggestMgr::suggest(char*** slst, const char * w, int nsug)
 {
+    int nocompoundtwowords = 0;
+    char ** wlst;    
+    w_char word_utf[MAXSWL];
+    int wl;
+
+  char w2[MAXWORDUTF8LEN];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
     
-    int nsug = ns;
+    if (*slst) {
+	wlst = *slst;
+    } else {
+	wlst = (char **) malloc(maxSug * sizeof(char *));
+	if (wlst == NULL) return -1;
+        for (int i = 0; i < maxSug; i++) wlst[i] = NULL;
+    }
+    
+    if (utf8) {
+        wl = u8_u16(word_utf, MAXSWL, word);
+    }
 
-    // perhaps we made chose the wrong char from a related set
-    if ((nsug < maxSug) && (nsug > -1))
-      nsug = mapchars(wlst, word, nsug);
+    for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
 
     // perhaps we made a typical fault of spelling
     if ((nsug < maxSug) && (nsug > -1))
-      nsug = replchars(wlst, word, nsug);
+    nsug = replchars(wlst, word, nsug, cpdsuggest);
 
-    // did we forget to add a char
+    // perhaps we made chose the wrong char from a related set
     if ((nsug < maxSug) && (nsug > -1))
-      nsug = forgotchar(wlst, word, nsug);
+      nsug = mapchars(wlst, word, nsug, cpdsuggest);
 
     // did we swap the order of chars by mistake
-    if ((nsug < maxSug) && (nsug > -1))
-      nsug = swapchar(wlst, word, nsug);
+    if ((nsug < maxSug) && (nsug > -1)) {
+        nsug = (utf8) ? swapchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
+                    swapchar(wlst, word, nsug, cpdsuggest);
+    }
+
+    // did we forget to add a char
+    if ((nsug < maxSug) && (nsug > -1)) {
+        nsug = (utf8) ? forgotchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
+                    forgotchar(wlst, word, nsug, cpdsuggest);
+    }
 
     // did we add a char that should not be there
-    if ((nsug < maxSug) && (nsug > -1))
-      nsug = extrachar(wlst, word, nsug);
-   
+    if ((nsug < maxSug) && (nsug > -1)) {
+        nsug = (utf8) ? extrachar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
+                    extrachar(wlst, word, nsug, cpdsuggest);
+    }
+
     // did we just hit the wrong key in place of a good char
-    if ((nsug < maxSug) && (nsug > -1))
-      nsug = badchar(wlst, word, nsug);
+    if ((nsug < maxSug) && (nsug > -1)) {
+        nsug = (utf8) ? badchar_utf(wlst, word_utf, wl, nsug, cpdsuggest) :
+                    badchar(wlst, word, nsug, cpdsuggest);
+    }
+
+    // only suggest compound words when no other suggestion
+    if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
 
     // perhaps we forgot to hit space and two words ran together
-    if (!nosplitsugs) {
-        if ((nsug < maxSug) && (nsug > -1))
-           nsug = twowords(wlst, word, nsug);
+    if ((!nosplitsugs) && (nsug < maxSug) && (nsug > -1)) {
+   		nsug = twowords(wlst, word, nsug, cpdsuggest);
+	}
+
+    } // repeating ``for'' statement compounding support
+
+    if (nsug < 0) {
+     // we ran out of memory - we should free up as much as possible
+       for (int i = 0; i < maxSug; i++)
+	 if (wlst[i] != NULL) free(wlst[i]);
+       free(wlst);
+       wlst = NULL;
     }
+
+    *slst = wlst;
     return nsug;
 }
 
+// generate suggestions for a word with typical mistake
+//    pass in address of array of char * pointers
+
+int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug)
+{
+    int nocompoundtwowords = 0;
+    char ** wlst;
+
+  char w2[MAXWORDUTF8LEN];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+    if (*slst) {
+	wlst = *slst;
+    } else {
+	wlst = (char **) malloc(maxSug * sizeof(char *));
+	if (wlst == NULL) return -1;
+    }
+
+    for (int cpdsuggest=0; (cpdsuggest<2) && (nocompoundtwowords==0); cpdsuggest++) {
+
+    // perhaps we made a typical fault of spelling
+    if ((nsug < maxSug) && (nsug > -1))
+    nsug = replchars(wlst, word, nsug, cpdsuggest);
+
+    // perhaps we made chose the wrong char from a related set
+    if ((nsug < maxSug) && (nsug > -1) && (cpdsuggest == 0))
+      nsug = mapchars(wlst, word, nsug, cpdsuggest);
+
+    if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
+
+    // perhaps we forgot to hit space and two words ran together
+
+    if ((nsug < maxSug) && (nsug > -1) && check_forbidden(word, strlen(word))) {
+    		nsug = twowords(wlst, word, nsug, cpdsuggest);
+	}
+    
+    } // repeating ``for'' statement compounding support
+
+    if (nsug < 0) {
+       for (int i=0;i<maxSug; i++)
+	 if (wlst[i] != NULL) free(wlst[i]);
+       free(wlst);
+       return -1;
+    }
+
+    *slst = wlst;
+    return nsug;
+}
 
 
 // suggestions for when chose the wrong char out of a related set
-int SuggestMgr::mapchars(char** wlst, const char * word, int ns)
+int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
 {
+  time_t timelimit;
+  int timer;
+  
   int wl = strlen(word);
   if (wl < 2 || ! pAMgr) return ns;
 
   int nummap = pAMgr->get_nummap();
   struct mapentry* maptable = pAMgr->get_maptable();
   if (maptable==NULL) return ns;
-  ns = map_related(word, 0, wlst, ns, maptable, nummap);
+
+  timelimit = time(NULL);
+  timer = MINTIMER;
+  if (utf8) {
+    w_char w[MAXSWL];
+    int len = u8_u16(w, MAXSWL, word);
+    ns = map_related_utf(w, len, 0, wlst, ns, maptable, nummap, &timer, &timelimit);
+  } else ns = map_related(word, 0, wlst, ns, maptable, nummap, &timer, &timelimit);
   return ns;
 }
 
-
-int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const mapentry* maptable, int nummap) 
+int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns,
+    const mapentry* maptable, int nummap, int * timer, time_t * timelimit)
 {
-  char c = *(word + i);
+  char c = *(word + i);  
   if (c == 0) {
       int cwrd = 1;
+      int wl;
       for (int m=0; m < ns; m++)
 	  if (strcmp(word,wlst[m]) == 0) cwrd = 0;
-      if ((cwrd) && check(word,strlen(word))) {
+      if ((cwrd) && (wl = strlen(word)) && (check(word, wl, 0, timer, timelimit) || 
+        check(word, wl, 1, timer, timelimit))) {
 	  if (ns < maxSug) {
 	      wlst[ns] = mystrdup(word);
 	      if (wlst[ns] == NULL) return -1;
@@ -121,14 +265,55 @@ int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const
       char * newword = mystrdup(word);
       for (int k = 0; k < maptable[j].len; k++) {
 	*(newword + i) = *(maptable[j].set + k);
-	ns = map_related(newword, (i+1), wlst, ns, maptable, nummap);
+	ns = map_related(newword, (i+1), wlst, ns, maptable, nummap, timer, timelimit);
+        if (!(*timelimit)) return ns;
       }
       free(newword);
     }
   }
   if (!in_map) {
      i++;
-     ns = map_related(word, i, wlst, ns, maptable, nummap);
+     ns = map_related(word, i, wlst, ns, maptable, nummap, timer, timelimit);
+  }
+  return ns;
+}
+
+int SuggestMgr::map_related_utf(w_char * word, int len, int i, char** wlst, int ns,
+    const mapentry* maptable, int nummap, int * timer, time_t * timelimit) 
+{
+  if (i == len) {
+      int cwrd = 1;
+      int wl;
+      char s[MAXSWUTF8L];
+      u16_u8(s, MAXSWUTF8L, word, len);
+      for (int m=0; m < ns; m++)
+	  if (strcmp(s,wlst[m]) == 0) cwrd = 0;
+      if ((cwrd) && (wl = strlen(s)) && (check(s, wl, 0, timer, timelimit) || 
+            check(s, wl, 1, timer, timelimit))) {
+	  if (ns < maxSug) {
+	      wlst[ns] = mystrdup(s);
+	      if (wlst[ns] == NULL) return -1;
+	      ns++;
+	  }
+      }
+      return ns;
+  } 
+  int in_map = 0;
+  unsigned short c = *((unsigned short *) word + i);
+  for (int j = 0; j < nummap; j++) {
+    if (flag_bsearch((unsigned short *) maptable[j].set_utf16, c, maptable[j].len)) {
+      in_map = 1;
+      for (int k = 0; k < maptable[j].len; k++) {
+	*(word + i) = *(maptable[j].set_utf16 + k);
+	ns = map_related_utf(word, len, i + 1, wlst, ns, maptable, nummap, timer, timelimit);
+        if (!(*timelimit)) return ns;
+      }
+      *((unsigned short *) word + i) = c;
+    }
+  }
+  if (!in_map) {
+     i++;
+     ns = map_related_utf(word, len, i, wlst, ns, maptable, nummap, timer, timelimit);
   }
   return ns;
 }
@@ -137,9 +322,9 @@ int SuggestMgr::map_related(const char * word, int i, char** wlst, int ns, const
 
 // suggestions for a typical fault of spelling, that
 // differs with more, than 1 letter from the right form.
-int SuggestMgr::replchars(char** wlst, const char * word, int ns)
+int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest)
 {
-  char candidate[MAXSWL];
+  char candidate[MAXSWUTF8L];
   const char * r;
   int lenr, lenp;
   int cwrd;
@@ -153,21 +338,24 @@ int SuggestMgr::replchars(char** wlst, const char * word, int ns)
 
   for (int i=0; i < numrep; i++ ) {
       r = word;
-      lenr = strlen(reptable[i].replacement);
+      lenr = strlen(reptable[i].pattern2);
       lenp = strlen(reptable[i].pattern);
       // search every occurence of the pattern in the word
       while ((r=strstr(r, reptable[i].pattern)) != NULL) {
 	  strcpy(candidate, word);
-	  if (r-word + lenr + strlen(r+lenp) >= MAXSWL) break;
-	  strcpy(candidate+(r-word),reptable[i].replacement);
+	  if (r-word + lenr + strlen(r+lenp) >= MAXSWUTF8L) break;
+	  strcpy(candidate+(r-word),reptable[i].pattern2);
 	  strcpy(candidate+(r-word)+lenr, r+lenp);
           cwrd = 1;
           for (int k=0; k < ns; k++)
 	      if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-          if ((cwrd) && check(candidate,strlen(candidate))) {
+          if ((cwrd) && check(candidate,strlen(candidate), cpdsuggest, NULL, NULL)) {
 	      if (ns < maxSug) {
 		  wlst[ns] = mystrdup(candidate);
-		  if (wlst[ns] == NULL) return -1;
+		  if (wlst[ns] == NULL) {
+		      for (int j=0; j<ns; j++) free(wlst[j]);
+		      return -1;
+		  }
 		  ns++;
 	      } else return ns;
 	  }
@@ -177,16 +365,56 @@ int SuggestMgr::replchars(char** wlst, const char * word, int ns)
    return ns;
 }
 
+// perhaps we made a special pattern mistake
+// for example: vacation -> vacacation (doubled `ac')
+int SuggestMgr::doubledsyllable(char** wlst, const char * word, int ns, int cpdsuggest)
+{
+  char candidate[MAXSWUTF8L];
+  int state=0;
+  int cwrd;
+
+  int wl = strlen(word);
+  if (wl < 5 || ! pAMgr) return ns;
+
+  for (int i=2; i < wl; i++ ) {
+      if (word[i]==word[i-2]) {
+	  state++;
+	  if (state==3) {
+	    strcpy(candidate,word);
+	    strcpy(candidate+i-1,word+i+1);
+            cwrd = 1;
+            for (int k=0; k < ns; k++)
+	        if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+            if ((cwrd) && check(candidate,strlen(candidate), cpdsuggest, NULL, NULL)) {
+	        if (ns < maxSug) {
+	  	    wlst[ns] = mystrdup(candidate);
+		    if (wlst[ns] == NULL) {
+		        for (int j=0; j<ns; j++) free(wlst[j]);
+		        return -1;
+		    }
+		    ns++;
+	        } else return ns;
+	    }
+	    state=0;
+	  }
+      } else {
+    	    state=0;
+      }
+  }
+  return ns;
+}
 
 // error is wrong char in place of correct one
-int SuggestMgr::badchar(char ** wlst, const char * word, int ns)
+int SuggestMgr::badchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 {
   char	tmpc;
-  char	candidate[MAXSWL];
+  char	candidate[MAXSWUTF8L];
+  time_t timelimit = time(NULL);
+  int timer = MINTIMER;
 
   int wl = strlen(word);
   int cwrd;
-  strcpy (candidate, word);
+  strcpy(candidate, word);
 
   // swap out each char one by one and try all the tryme
   // chars in its place to see if that makes a good word
@@ -198,24 +426,92 @@ int SuggestMgr::badchar(char ** wlst, const char * word, int ns)
        cwrd = 1;
        for (int k=0; k < ns; k++)
 	 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-       if ((cwrd) && check(candidate,wl)) {
+       if ((cwrd) && check(candidate,wl, cpdsuggest, &timer, &timelimit)) {
 	 if (ns < maxSug) {
             wlst[ns] = mystrdup(candidate);
             if (wlst[ns] == NULL) return -1;
             ns++;
          } else return ns;
        }
+       if (!timelimit) return ns;
        candidate[i] = tmpc;
     }
   }
   return ns;
 }
 
+// error is wrong char in place of correct one
+int SuggestMgr::badchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
+{
+  w_char	tmpc;
+  w_char	candidate_utf[MAXSWL];
+  char          candidate[MAXSWUTF8L];
+  int cwrd;
+  time_t timelimit = time(NULL);
+  int timer = MINTIMER;
+  
+  memcpy(candidate_utf, word, wl * sizeof(w_char));
+
+  // swap out each char one by one and try all the tryme
+  // chars in its place to see if that makes a good word
+  for (int i=0; i < wl; i++) {
+    tmpc = candidate_utf[i];
+    for (int j=0; j < ctryl; j++) {
+       if ((ctry_utf[j].l == tmpc.l) && (ctry_utf[j].h == tmpc.h)) continue;
+       candidate_utf[i] = ctry_utf[j];
+       cwrd = 1;
+       u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
+       for (int k=0; k < ns; k++)
+	 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+       if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, &timer, &timelimit)) {
+	 if (ns < maxSug) {
+            wlst[ns] = mystrdup(candidate);
+            if (wlst[ns] == NULL) return -1;
+            ns++;
+         } else return ns;
+       }
+       if (!timelimit) return ns;
+       candidate_utf[i] = tmpc;
+    }
+  }
+  return ns;
+}
+
+// error is word has an extra letter it does not need 
+int SuggestMgr::extrachar_utf(char** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
+{
+   char	   candidate[MAXSWUTF8L];
+   w_char	   candidate_utf[MAXSWL];
+
+   const w_char * p;
+   w_char * r;
+   int cwrd;
+
+   if (wl < 2) return ns;
+
+   // try omitting one char of word at a time
+   memcpy(candidate_utf, word + 1, (wl - 1) * sizeof(w_char));
+   for (p = word, r = candidate_utf;  p < word + wl;  ) {
+       cwrd = 1;
+       u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl - 1);       
+       for (int k=0; k < ns; k++)
+	 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+       if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
+	 if (ns < maxSug) {
+            wlst[ns] = mystrdup(candidate);
+            if (wlst[ns] == NULL) return -1;
+            ns++;
+         } else return ns; 
+       }
+       *r++ = *p++;
+   }
+   return ns;
+}
 
 // error is word has an extra letter it does not need 
-int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
+int SuggestMgr::extrachar(char** wlst, const char * word, int ns, int cpdsuggest)
 {
-   char	   candidate[MAXSWL];
+   char	   candidate[MAXSWUTF8L];
    const char *  p;
    char *  r;
    int cwrd;
@@ -229,7 +525,7 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
        cwrd = 1;
        for (int k=0; k < ns; k++)
 	 if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-       if ((cwrd) && check(candidate,wl-1)) {
+       if ((cwrd) && check(candidate,wl-1, cpdsuggest, NULL, NULL)) {
 	 if (ns < maxSug) {
             wlst[ns] = mystrdup(candidate);
             if (wlst[ns] == NULL) return -1;
@@ -242,13 +538,15 @@ int SuggestMgr::extrachar(char** wlst, const char * word, int ns)
 }
 
 
-// error is mising a letter it needs
-int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
+// error is missing a letter it needs
+int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 {
-   char	candidate[MAXSWL];
+   char	candidate[MAXSWUTF8L];
    const char *	p;
    char *	q;
    int cwrd;
+   time_t timelimit = time(NULL);
+   int timer = MINTIMER;
 
    int wl = strlen(word);
 
@@ -260,13 +558,14 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
          cwrd = 1;
          for (int k=0; k < ns; k++)
 	   if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-         if ((cwrd) && check(candidate,wl+1)) {
+         if ((cwrd) && check(candidate, wl+1, cpdsuggest, &timer, &timelimit)) {
 	    if (ns < maxSug) {
                 wlst[ns] = mystrdup(candidate);
                 if (wlst[ns] == NULL) return -1;
                 ns++;
             } else return ns; 
          }
+         if (!timelimit) return ns;
       }
       *q++ = *p++;
    }
@@ -277,7 +576,57 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
       cwrd = 1;
       for (int k=0; k < ns; k++)
 	if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-      if ((cwrd) && check(candidate,wl+1)) {
+      if ((cwrd) && check(candidate,wl+1, cpdsuggest, NULL, NULL)) {
+	 if (ns < maxSug) {
+             wlst[ns] = mystrdup(candidate);
+             if (wlst[ns] == NULL) return -1;
+             ns++;
+         } else return ns;
+      }
+   }
+   return ns;
+}
+
+// error is missing a letter it needs
+int SuggestMgr::forgotchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
+{
+   w_char  candidate_utf[MAXSWL];
+   char    candidate[MAXSWUTF8L];
+   const w_char * p;
+   w_char * q;
+   int cwrd;
+   time_t timelimit = time(NULL);
+   int timer = MINTIMER;
+
+   // try inserting a tryme character before every letter
+   memcpy (candidate_utf + 1, word, wl * sizeof(w_char));
+   for (p = word, q = candidate_utf;  p < (word + wl); )  {
+      for (int i = 0;  i < ctryl;  i++) {
+	 *q = ctry_utf[i];
+         cwrd = 1;
+         u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1);
+         for (int k=0; k < ns; k++)
+            if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+         if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, &timer, &timelimit)) {
+            if (ns < maxSug) {
+                wlst[ns] = mystrdup(candidate);
+                if (wlst[ns] == NULL) return -1;
+                ns++;
+            } else return ns; 
+         }
+         if (!timelimit) return ns;
+       }
+      *q++ = *p++;
+   }
+
+   // now try adding one to end */
+   for (int i = 0;  i < ctryl;  i++) {
+      *q = ctry_utf[i];
+      cwrd = 1;
+      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl + 1);
+      for (int k=0; k < ns; k++)
+	if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+      if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
 	 if (ns < maxSug) {
              wlst[ns] = mystrdup(candidate);
              if (wlst[ns] == NULL) return -1;
@@ -290,27 +639,51 @@ int SuggestMgr::forgotchar(char ** wlst, const char * word, int ns)
 
 
 /* error is should have been two words */
-int SuggestMgr::twowords(char ** wlst, const char * word, int ns)
+int SuggestMgr::twowords(char ** wlst, const char * word, int ns, int cpdsuggest)
 {
-    char candidate[MAXSWL];
+    char candidate[MAXSWUTF8L];
     char * p;
+    int c1, c2, cwrd;
+    int forbidden = 0;
 
     int wl=strlen(word);
-    if (wl < 3) return ns;
+    if (wl < 4) return ns;
+    
+    if (pAMgr->get_langnum() == LANG_hu) forbidden = check_forbidden(word, wl);
+
     strcpy(candidate + 1, word);
+    candidate[0] = word[0];
 
     // split the string into two pieces after every char
     // if both pieces are good words make them a suggestion
-    for (p = candidate + 1;  p[1] != '\0';  p++) {
+    for (p = candidate + 2;  p[2] != '\0';  p++) {
        p[-1] = *p;
+       // go to end of the UTF-8 character
+       while (utf8 && ((p[1] & 0xc0) == 0x80)) {
+         p++;
+         p[-1] = *p;
+       }
        *p = '\0';
-       if (check(candidate,strlen(candidate))) {
-	 if (check((p+1),strlen(p+1))) {
-	    *p = ' ';
+       if ((c1=check(candidate,strlen(candidate), cpdsuggest, NULL, NULL))) {
+	 if ((c2=check((p+1),strlen(p+1), cpdsuggest, NULL, NULL))) {
+            *p = ' ';
+
+            // spec. Hungarian code (need a better compound word support)
+            if ((pAMgr->get_langnum() == LANG_hu) && !forbidden &&
+	        // if 3 repeating letter, use - instead of space
+	        (((p[-1] == p[1]) && (((p>candidate+1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) ||
+	        // or multiple compounding, with more, than 6 syllables
+                ((c1 == 3) && (c2 >= 2)))) *p = '-';
+
+	    cwrd = 1;
+	    for (int k=0; k < ns; k++)
+		if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
 	    if (ns < maxSug) {
-                wlst[ns] = mystrdup(candidate);
-                if (wlst[ns] == NULL) return -1;
-                ns++;
+		if (cwrd) {
+            	    wlst[ns] = mystrdup(candidate);
+            	    if (wlst[ns] == NULL) return -1;
+            	    ns++;
+		}
             } else return ns;
          }
        }
@@ -320,14 +693,14 @@ int SuggestMgr::twowords(char ** wlst, const char * word, int ns)
 
 
 // error is adjacent letter were swapped
-int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
+int SuggestMgr::swapchar(char ** wlst, const char * word, int ns, int cpdsuggest)
 {
-   char	candidate[MAXSWL];
+   char	candidate[MAXSWUTF8L];
    char * p;
    char	tmpc;
    int cwrd;
 
-   int wl = strlen(word);
+   int wl=strlen(word);
 
    // try swapping adjacent chars one by one
    strcpy(candidate, word);
@@ -338,7 +711,7 @@ int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
       cwrd = 1;
       for (int k=0; k < ns; k++)
 	if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
-      if ((cwrd) && check(candidate,wl)) {
+      if ((cwrd) && check(candidate,wl, cpdsuggest, NULL, NULL)) {
 	 if (ns < maxSug) {
              wlst[ns] = mystrdup(candidate);
              if (wlst[ns] == NULL) return -1;
@@ -352,9 +725,41 @@ int SuggestMgr::swapchar(char ** wlst, const char * word, int ns)
    return ns;
 }
 
+// error is adjacent letter were swapped
+int SuggestMgr::swapchar_utf(char ** wlst, const w_char * word, int wl, int ns, int cpdsuggest)
+{
+   w_char candidate_utf[MAXSWL];
+   char   candidate[MAXSWUTF8L];
+   w_char * p;
+   w_char tmpc;
+   int cwrd;
+
+   // try swapping adjacent chars one by one
+   memcpy (candidate_utf, word, wl * sizeof(w_char));
+   for (p = candidate_utf;  p < (candidate_utf + wl - 1);  p++) {
+      tmpc = *p;
+      *p = p[1];
+      p[1] = tmpc;
+      cwrd = 1;
+      u16_u8(candidate, MAXSWUTF8L, candidate_utf, wl);
+      for (int k=0; k < ns; k++)
+	if (strcmp(candidate,wlst[k]) == 0) cwrd = 0;
+      if ((cwrd) && check(candidate, strlen(candidate), cpdsuggest, NULL, NULL)) {
+	 if (ns < maxSug) {
+             wlst[ns] = mystrdup(candidate);
+             if (wlst[ns] == NULL) return -1;
+             ns++;
+         } else return ns;
+      }
+      tmpc = *p;
+      *p = p[1];
+      p[1] = tmpc;
+   }
+   return ns;
+}
 
 // generate a set of suggestions for very poorly spelled words
-int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
+int SuggestMgr::ngsuggest(char** wlst, char * w, HashMgr* pHMgr)
 {
 
   int i, j;
@@ -374,14 +779,32 @@ int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
   }
   lp = MAX_ROOTS - 1;
 
-  int n = strlen(word);
+  char w2[MAXWORDUTF8LEN];
+  char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+  char mw[MAXSWUTF8L];
+  w_char u8[MAXSWL];
+  int nc = strlen(word);
+  int n = (utf8) ? u8_u16(u8, MAXSWL, word) : nc;
 
   struct hentry* hp = NULL;
   int col = -1;
   while ((hp = pHMgr->walk_hashtable(col, hp))) {
+    // check forbidden words
+    if ((hp->astr) && (pAMgr) && 
+       (TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) ||
+          TESTAFF(hp->astr, pAMgr->get_nosuggest(), hp->alen) ||
+          TESTAFF(hp->astr, pAMgr->get_onlyincompound(), hp->alen))) continue;
     sc = ngram(3, word, hp->word, NGRAM_LONGER_WORSE);
     if (sc > scores[lp]) {
-      scores[lp] = sc;
+      scores[lp] = sc;  
       roots[lp] = hp;
       int lval = sc;
       for (j=0; j < MAX_ROOTS; j++)
@@ -396,14 +819,17 @@ int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
   // mangle original word three differnt ways
   // and score them to generate a minimum acceptable score
   int thresh = 0;
-  char * mw = NULL;
   for (int sp = 1; sp < 4; sp++) {
-     mw = mystrdup(word);
-     for (int k=sp; k < n; k+=4) *(mw + k) = '*';
-     thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
-     free(mw);
+     if (utf8) {
+       for (int k=sp; k < n; k+=4) *((unsigned short *) u8 + k) = '*';
+       u16_u8(mw, MAXSWUTF8L, u8, n);
+       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+     } else {
+       strcpy(mw, word);
+       for (int k=sp; k < n; k+=4) *(mw + k) = '*';
+       thresh = thresh + ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+     }
   }
-  mw = NULL;
   thresh = thresh / 3;
   thresh--;
 
@@ -428,99 +854,722 @@ int SuggestMgr::ngsuggest(char** wlst, char * word, HashMgr* pHMgr)
       if (roots[i]) {
         struct hentry * rp = roots[i];
 	int nw = pAMgr->expand_rootword(glst, MAX_WORDS, rp->word, rp->wlen,
-                                        rp->astr, rp->alen);
-        for (int k = 0; k < nw; k++) {
+                                        rp->astr, rp->alen, word, nc);
+
+        for (int k = 0; k < nw ; k++) {
            sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH);
-	   if (sc > thresh)
-	   {
-		if (sc > gscore[lp])
-		{
-			if (guess[lp]) free(guess[lp]);
-			gscore[lp] = sc;
-			guess[lp] = glst[k].word;
-			glst[k].word = NULL;
-			lval = sc;
-			for (j=0; j < MAX_GUESS; j++)
-			{
-				if (gscore[j] < lval)
-				{
-					lp = j;
-					lval = gscore[j];
-				}
-			}
-		}
-	   }
-	   free (glst[k].word);
-	   glst[k].word = NULL;
-	   glst[k].allow = 0;
+           if ((sc > thresh)) {
+              if (sc > gscore[lp]) {
+	         if (guess[lp]) free (guess[lp]);
+                 gscore[lp] = sc;
+                 guess[lp] = glst[k].word;
+                 lval = sc;
+                 for (j=0; j < MAX_GUESS; j++)
+	            if (gscore[j] < lval) {
+	               lp = j;
+                       lval = gscore[j];
+	            }
+	      } else free (glst[k].word);  
+	   } else free(glst[k].word);
 	}
       }
   }
-  if (glst) free(glst);
+  free(glst);
 
   // now we are done generating guesses
-  // sort in order of decreasing score and copy over
+  // sort in order of decreasing score
   
   bubblesort(&guess[0], &gscore[0], MAX_GUESS);
+
+  // weight suggestions with a similarity index, based on
+  // the longest common subsequent algorithm and resort
+
+  int is_swap;
+  for (i=0; i < MAX_GUESS; i++) {
+      if (guess[i]) {
+        // lowering guess[i]
+        char gl[MAXSWUTF8L];
+        int len;
+        if (utf8) {
+          w_char w[MAXSWL];
+          len = u8_u16(w, MAXSWL, guess[i]);
+          mkallsmall_utf(w, len, utfconv);
+          u16_u8(gl, MAXSWUTF8L, w, len);
+        } else {
+          strcpy(gl, guess[i]);
+          mkallsmall(gl, csconv);
+          len = strlen(guess[i]);
+        }
+
+        int lcs = lcslen(word, gl);
+
+        // same characters with different casing
+        if ((n == len) && (n == lcs)) {
+            gscore[i] += 2000;
+            break;
+        }
+        
+        // heuristic weigthing of ngram scores
+        gscore[i] +=
+          // length of longest common subsequent minus lenght difference
+          2 * lcs - abs((int) (n - len)) +
+          // weight equal first letter
+          equalfirstletter(word, gl) +
+          // weight equal character positions
+          ((lcs == commoncharacterpositions(word, gl, &is_swap)) ? 1: 0) +
+          // swap character (not neighboring)
+          ((is_swap) ? 1000 : 0);
+      }
+  }
+
+  bubblesort(&guess[0], &gscore[0], MAX_GUESS);
+
+  // copy over
+
   int ns = 0;
+  int same = 0;
   for (i=0; i < MAX_GUESS; i++) {
     if (guess[i]) {
-      int unique = 1;
-      for (j=i+1; j < MAX_GUESS; j++)
-	if (guess[j]) 
-	    if (!strcmp(guess[i], guess[j])) unique = 0;
-      if (unique) {
-         wlst[ns++] = guess[i];
-      } else {
-	 free(guess[i]);
-      }
+      if ((ns < maxngramsugs) && (ns < maxSug) && (!same || (gscore[i] > 1000))) {
+        int unique = 1;
+        // we have excellent suggestion(s)
+        if (gscore[i] > 1000) same = 1;
+        for (j=0; j < ns; j++)
+          // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
+          if (strstr(guess[i], wlst[j]) || 
+            // check forbidden words
+            !check(guess[i], strlen(guess[i]), 0, NULL, NULL)) unique = 0;
+        if (unique) wlst[ns++] = guess[i]; else free(guess[i]);
+      } else free(guess[i]);
     }
   }
+
   return ns;
 }
 
 
-
-
 // see if a candidate suggestion is spelled correctly
 // needs to check both root words and words with affixes
-int SuggestMgr::check(const char * word, int len)
+
+// obsolote MySpell-HU modifications:
+// return value 2 and 3 marks compounding with hyphen (-)
+// `3' marks roots without suffix
+int SuggestMgr::check(const char * word, int len, int cpdsuggest, int * timer, time_t * timelimit)
 {
   struct hentry * rv=NULL;
+  int nosuffix = 0;
+  
+  // check time limit
+  if (timer) {
+    (*timer)--;
+    if (!(*timer) && timelimit) {
+      if (time(NULL) > *timelimit) {
+        *timelimit = 0;
+        return 0;
+      }
+      *timer = MAXPLUSTIMER;
+    }
+  }
+  
   if (pAMgr) { 
+    if (cpdsuggest==1) {
+      if (pAMgr->get_compound()) {
+	rv = pAMgr->compound_check(word,len,0,0,0,0,NULL,0,NULL,NULL,1);
+	if (rv) return 3; // XXX obsolote categorisation
+	}
+        return 0;
+    }
+
     rv = pAMgr->lookup(word);
-    if (rv == NULL) rv = pAMgr->affix_check(word,len);
+
+    if (rv) {
+        if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
+               || TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen))) return 0;
+        if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_pseudoroot(),rv->alen) ||
+            TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL;
+    } else rv = pAMgr->prefix_check(word, len, 0); // only prefix, and prefix + suffix XXX
+    
+    if (rv) {
+	nosuffix=1;
+    } else {
+	rv = pAMgr->suffix_check(word, len, 0, NULL, NULL, 0, NULL); // only suffix
+    }
+
+    if (!rv && pAMgr->have_contclass()) {
+        rv = pAMgr->suffix_check_twosfx(word, len, 0, NULL, FLAG_NULL);
+        if (!rv) rv = pAMgr->prefix_check_twosfx(word, len, 1, FLAG_NULL);
+    }
+
+    // check forbidden words
+    if ((rv) && (rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
+      || TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen) ||
+      TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) return 0;
+
+    if (rv) { // XXX obsolote    
+      if ((pAMgr->get_compoundflag()) && 
+          TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) return 2 + nosuffix; 
+      return 1;
+    }
   }
-  if (rv) return 1;
   return 0;
 }
 
+int SuggestMgr::check_forbidden(const char * word, int len)
+{
+  struct hentry * rv = NULL;
+
+  if (pAMgr) { 
+    rv = pAMgr->lookup(word);
+    if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_pseudoroot(),rv->alen) ||
+        TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL;
+    if (!(pAMgr->prefix_check(word,len,1)))
+        rv = pAMgr->suffix_check(word,len, 0, NULL, NULL, 0, NULL); // prefix+suffix, suffix
+    // check forbidden words
+    if ((rv) && (rv->astr) && TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)) return 1;
+   }
+    return 0;
+}
+
+// suggest stems, XXX experimental code
+int SuggestMgr::suggest_stems(char*** slst, const char * w, int nsug)
+{
+    char buf[MAXSWUTF8L];
+    char ** wlst;    
+    int prevnsug = nsug;
+
+  char w2[MAXWORDUTF8LEN];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+    if (*slst) {
+	wlst = *slst;
+    } else {
+	wlst = (char **) calloc(maxSug, sizeof(char *));
+	if (wlst == NULL) return -1;
+    }
+    // perhaps there are a fix stem in the dictionary
+    if ((nsug < maxSug) && (nsug > -1)) {
+    
+    nsug = fixstems(wlst, word, nsug);
+    if (nsug == prevnsug) {
+	char * s = mystrdup(word);
+	char * p = s + strlen(s);
+	while ((*p != '-') && (p != s)) p--;
+	if (*p == '-') {
+	    *p = '\0';
+	    nsug = fixstems(wlst, s, nsug);
+	    if ((nsug == prevnsug) && (nsug < maxSug) && (nsug >= 0)) {
+		char * t;
+		buf[0] = '\0';
+		for (t = s; (t[0] != '\0') && ((t[0] >= '0') || (t[0] <= '9')); t++); // is a number?
+		if (*t != '\0') strcpy(buf, "# ");
+		strcat(buf, s);
+		wlst[nsug] = mystrdup(buf);
+                if (wlst[nsug] == NULL) return -1;
+		nsug++;
+	    }
+	    p++;
+	    nsug = fixstems(wlst, p, nsug);
+	}
+
+	free(s);
+    }
+    }
+    
+    if (nsug < 0) {
+       for (int i=0;i<maxSug; i++)
+	 if (wlst[i] != NULL) free(wlst[i]);
+         free(wlst);
+       return -1;
+    }
+
+    *slst = wlst;
+    return nsug;
+}
+
+
+// there are fix stems in dictionary
+int SuggestMgr::fixstems(char ** wlst, const char * word, int ns)
+{
+    char fix[MAXSWUTF8L];
+    char buf[MAXSWUTF8L];
+    char prefix[MAXSWUTF8L] = "";
+
+    char * p;
+    int dicstem = 1; // 0 = lookup, 1= affix, 2 = compound
+    int cpdindex = 0;
+    struct hentry * rv = NULL;
+    struct hentry * rv2 = NULL;
+
+    int wl = strlen(word);
+    int cmpdstemnum;
+    int cmpdstem[MAXCOMPOUND];
+
+    if (pAMgr) { 
+	rv = pAMgr->lookup(word);
+	if (rv) {
+	    dicstem = 0;
+	} else {
+	    // try stripping off affixes 
+	    rv = pAMgr->affix_check(word, wl);
+
+	    // else try check compound word
+	    if (!rv && pAMgr->get_compound()) {
+        	rv = pAMgr->compound_check(word, wl,
+		     0, 0, 100, 0, NULL, 0, &cmpdstemnum, cmpdstem,1);
+
+		if (rv) {
+		    dicstem = 2;
+		    for (int j = 0; j < cmpdstemnum; j++) {
+			cpdindex += cmpdstem[j];
+		    }
+		    if(! (pAMgr->lookup(word + cpdindex)))
+		    	pAMgr->affix_check(word + cpdindex, wl - cpdindex); // for prefix
+		}
+	    }
+
+
+	    if (pAMgr->get_prefix()) {
+	    	strcpy(prefix, pAMgr->get_prefix());
+	    }
+
+	    // XXX obsolote, will be a general solution for stemming
+	    if ((prefix) && (strncmp(prefix, "leg", 3)==0)) prefix[0] = '\0'; // (HU)	    
+	}
+
+    }
+
+
+
+    if ((rv) && (ns < maxSug)) {
+    
+	// check fixstem flag and not_valid_stem flag
+	// first word
+	if ((ns < maxSug) && (dicstem < 2)) { 
+	    strcpy(buf, prefix);
+	    if ((dicstem > 0) && pAMgr->get_derived()) {
+		// XXX obsolote
+	           if (strlen(prefix) == 1) {
+			strcat(buf, (pAMgr->get_derived()) + 1);
+		   } else {
+			strcat(buf, pAMgr->get_derived());
+		   }
+		} else {
+			// special stem in affix description
+			const char * wordchars = pAMgr->get_wordchars();
+			if (rv->description && 
+			   (strchr(wordchars, *(rv->description)))) {
+			   char * desc = (rv->description) + 1;
+			   while (strchr(wordchars, *desc)) desc++;
+			   strncat(buf, rv->description, desc - (rv->description));
+			} else {
+			    strcat(buf, rv->word);
+			}
+		}
+	    wlst[ns] = mystrdup(buf);
+	    if (wlst[ns] == NULL) return -1;
+	    ns++;
+	}
+
+	if (dicstem == 2) {
+
+	    // compound stem
+
+//	    if (rv->astr && (strchr(rv->astr, '0') == NULL)) {
+	    if (rv->astr) {
+		strcpy(buf, word);
+		buf[cpdindex] = '\0';
+		if (prefix) strcat(buf, prefix);
+	        if (pAMgr->get_derived()) {
+			strcat(buf, pAMgr->get_derived());
+		} else {
+			// special stem in affix description
+			const char * wordchars = pAMgr->get_wordchars();
+			if (rv->description && 
+			   (strchr(wordchars, *(rv->description)))) {
+			   char * desc = (rv->description) + 1;
+			   while (strchr(wordchars, *desc)) desc++;
+			   strncat(buf, rv->description, desc - (rv->description));
+			} else {
+			    strcat(buf, rv->word);
+			}
+		}
+		if (ns < maxSug) {
+		    wlst[ns] = mystrdup(buf);
+		    if (wlst[ns] == NULL) return -1;
+		    ns++;
+		}
+	    }
+	}
+    }
+while (rv) {
+    if (0) { // obsolote
+	if ((p[1] > '0') && (p[1] <= '9')) {
+	    if ((ns < maxSug) && (dicstem != 2)) {
+		int split = p[1] - '0';
+		if (rv->wlen <= split) break;
+				
+		strcpy(fix, rv->word);
+
+		// checking verbs ending with `ik'
+		
+		fix[rv->wlen - split] = 'i';
+		fix[rv->wlen - split + 1] = 'k';
+		fix[rv->wlen - split + 2] = '\0';
+
+		if (! (rv2 = pAMgr->lookup(fix))) {
+		    fix[strlen(fix) - 2] = '\0';
+		    rv2 = pAMgr->lookup(fix);
+		    if ((!rv2)) {
+			*fix = csconv[((unsigned char) *fix)].cupper;
+			rv2 = pAMgr->lookup(fix);
+			if (! rv2) return ns;
+		    }
+
+		}
+
+		if (0) {
+		    strcpy(buf, prefix);
+		    strcat(buf, fix);
+		    wlst[ns] = mystrdup(buf);
+            	    if (wlst[ns] == NULL) return -1;
+            	    ns++;
+		}
+		
+		rv = rv2;
+		
+            } else return ns; 
+	} else {
+	    strcpy(fix, "__");
+	    strcat(fix, rv->word);
+	    rv = NULL;
+	    rv2 = pAMgr->lookup(fix);
+	    if ((rv2) && (rv2->astr) && (ns < maxSug)) 
+	    if ((rv2) && (rv2->astr) && (ns < maxSug)) 
+	      if (0) {
+		char buf2[MAXSWUTF8L];
+
+		strcpy(buf2, prefix);
+                
+                if (*(rv2->astr) == '-') {
+                    strcat(buf2, "");
+                } else {
+                    strcat(buf2, "");
+                }
+
+		if (dicstem != 2) {
+            	    wlst[ns] = mystrdup(buf2);
+            	    if (wlst[ns] == NULL) return -1;
+            	    ns++;
+		}
+		
+		if ((dicstem == 2) && (ns < maxSug)) {
+		    strcpy(buf, word);
+		    buf[cpdindex] = '\0';
+		    strcat(buf + cpdindex, buf2);
+
+		    if (pAMgr->get_compound() &&
+        		(pAMgr->compound_check(buf, strlen(buf),
+	                          0,0,100,0,NULL,0,NULL,NULL,1))) {
+            		    wlst[ns] = mystrdup(buf);
+            		    if (wlst[ns] == NULL) return -1;
+			    ns++;
+		    }
+		}
+	    // many stems
+	    } else {
+		char * str = mystrdup("");
+		char * pos = str;
+		char * pos2;
+		do {
+		    int suggest = 1;
+		    pos2 = strchr(pos, '|');
+		    if (pos2) *pos2 = '\0';
+		    // ignore `-xxx' suggestion, when exists prefix
+		    if (*pos == '-') {
+			pos++;
+			if (*prefix != '\0') suggest = 0;
+		    }
+		    // ignore `xxx-' suggestion, when word is not root
+		    if ((strlen(pos) > 0) && (pos[strlen(pos)-1] == '-')) {
+			pos[strlen(pos)-1] = '\0';
+			strcpy(buf, prefix);
+			strcat(buf, fix + 2);
+			if ((dicstem != 0) && (strcmp(buf, word) != 0)) suggest = 0;
+		    }
+		    if ((suggest) && (ns < maxSug) && (strlen(pos) > 0)) {
+			strcpy(buf, prefix);
+			strcat(buf, pos);
+            		wlst[ns] = mystrdup(buf);
+            		if (wlst[ns] == NULL) return -1;
+            		ns++;
+		    }
+		    if (pos2) pos = pos2 + 1;
+		} while (pos2);
+		free(str);
+	    }
+	}
+    } else return ns;
+
+}
+
+return ns;
+
+}
+
+// suggest possible stems
+int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug)
+{
+    char ** wlst;    
+
+    struct hentry * rv = NULL;
+
+  char w2[MAXSWUTF8L];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+    int wl = strlen(word);
+
+
+    if (*slst) {
+	wlst = *slst;
+    } else {
+	wlst = (char **) calloc(maxSug, sizeof(char *));
+	if (wlst == NULL) return -1;
+    }
+
+    rv = pAMgr->suffix_check(word, wl, 0, NULL, wlst, maxSug, &nsug);
+
+    // delete dash from end of word
+    if (nsug > 0) {
+        for (int j=0; j < nsug; j++) {
+            if (wlst[j][strlen(wlst[j]) - 1] == '-') wlst[j][strlen(wlst[j]) - 1] = '\0';
+        }
+    }
+
+    *slst = wlst;
+    return nsug;
+}
+
+
+char * SuggestMgr::suggest_morph(const char * w)
+{
+    char result[MAXLNLEN];
+    char * r = (char *) result;
+    char * st;
+
+    struct hentry * rv = NULL;
+
+    *result = '\0';
+
+    if (! pAMgr) return NULL;
+
+  char w2[MAXSWUTF8L];
+  const char * word = w;
+
+  // word reversing wrapper for complex prefixes
+  if (complexprefixes) {
+    strcpy(w2, w);
+    if (utf8) reverseword_utf(w2); else reverseword(w2);
+    word = w2;
+  }
+
+    rv = pAMgr->lookup(word);
+    
+    while (rv) {
+        if ((!rv->astr) || !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+            TESTAFF(rv->astr, pAMgr->get_pseudoroot(), rv->alen) ||
+            TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) {
+            if (rv->description && ((!rv->astr) || 
+                !TESTAFF(rv->astr, pAMgr->get_lemma_present(), rv->alen)))
+                    strcat(result, word);
+            if (rv->description) strcat(result, rv->description);
+            strcat(result, "\n");
+        }
+        rv = rv->next_homonym;
+    }
+    
+    st = pAMgr->affix_check_morph(word,strlen(word));
+    if (st) {
+        strcat(result, st);
+        free(st);
+    }
+
+    if (pAMgr->get_compound() && (*result == '\0'))
+        pAMgr->compound_check_morph(word, strlen(word),
+		     0, 0, 100, 0,NULL, 0, &r, NULL);
+    
+    return (*result) ? mystrdup(line_uniq(delete_zeros(result))) : NULL;
+}
+
+char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
+{
+    char * p = NULL;
+	char ** wlst = (char **) calloc(maxSug, sizeof(char *));
+    // we will use only the first suggestion
+    for (int i = 0; i < maxSug - 1; i++) wlst[i] = "";
+   	int ns = suggest(&wlst, word, maxSug - 1);
+	if (ns == maxSug) {
+        p = suggest_morph(wlst[maxSug - 1]);
+        free(wlst[maxSug - 1]);
+    }
+	if (wlst) free(wlst);
+	return p;    
+}
 
 
 // generate an n-gram score comparing s1 and s2
 int SuggestMgr::ngram(int n, char * s1, const char * s2, int uselen)
 {
   int nscore = 0;
-  int l1 = strlen(s1);
-  int l2 = strlen(s2);
   int ns;
-  for (int j=1;j<=n;j++) {
-    ns = 0;
-    for (int i=0;i<=(l1-j);i++) {
-      char c = *(s1 + i + j);
-      *(s1 + i + j) = '\0';
-      if (strstr(s2,(s1+i))) ns++;
-      *(s1 + i + j ) = c;
-    }
-    nscore = nscore + ns;
-    if (ns < 2) break;
+  int l1;
+  int l2;
+
+  if (utf8) {
+    w_char su1[MAXSWL];
+    w_char su2[MAXSWL];
+    l1 = u8_u16(su1, MAXSWL, s1);
+    l2 = u8_u16(su2, MAXSWL, s2);
+    if (!l2) return 0;
+    // decapitalize dictionary word
+    if (complexprefixes) {
+      mkallsmall_utf(su2+l2-1, 1, utfconv);
+    } else {
+      mkallsmall_utf(su2, 1, utfconv);
+    }
+    for (int j = 1; j <= n; j++) {
+      ns = 0;
+      for (int i = 0; i <= (l1-j); i++) {
+        for (int l = 0; l <= (l2-j); l++) {
+            int k;
+            for (k = 0; (k < j); k++) {
+              w_char * c1 = su1 + i + k;
+              w_char * c2 = su2 + l + k;
+              if ((c1->l != c2->l) || (c1->h != c2->h)) break;
+            }
+            if (k == j) {
+                ns++;
+                break;
+            }
+        }
+      }
+      nscore = nscore + ns;
+      if (ns < 2) break;
+    }
+  } else {  
+    char t[MAXSWUTF8L];
+    l1 = strlen(s1);
+    l2 = strlen(s2);
+    if (!l2) return 0;
+    strcpy(t, s2);
+    if (complexprefixes) {
+      *(t+l2-1) = csconv[((unsigned char)*(t+l2-1))].clower;
+    } else {
+    mkallsmall(t, csconv);
+///      *t = csconv[((unsigned char)*t)].clower;
+    }
+    for (int j = 1; j <= n; j++) {
+      ns = 0;
+      for (int i = 0; i <= (l1-j); i++) {
+        char c = *(s1 + i + j);
+        *(s1 + i + j) = '\0';
+        if (strstr(t,(s1+i))) ns++;
+        *(s1 + i + j ) = c;
+      }
+      nscore = nscore + ns;
+      if (ns < 2) break;
+    }
   }
+
   ns = 0;
   if (uselen == NGRAM_LONGER_WORSE) ns = (l2-l1)-2;
   if (uselen == NGRAM_ANY_MISMATCH) ns = abs(l2-l1)-2;
   return (nscore - ((ns > 0) ? ns : 0));
 }
 
+int SuggestMgr::equalfirstletter(char * s1, const char * s2) {
+  if (utf8) {
+    w_char su1[MAXSWL];
+    w_char su2[MAXSWL];
+    // decapitalize dictionary word
+    if (complexprefixes) {
+      int l1 = u8_u16(su1, MAXSWL, s1);
+      int l2 = u8_u16(su2, MAXSWL, s2);
+      if (*((short *)su1+l1-1) == *((short *)su2+l2-1)) return 1;
+    } else {
+      u8_u16(su1, 1, s1);
+      u8_u16(su2, 1, s2);
+      if (*((short *)su1) == *((short *)su2)) return 1;
+    }
+  } else {
+    if (complexprefixes) {
+      int l1 = strlen(s1);
+      int l2 = strlen(s2);
+      if (*(s2+l1-1) == *(s2+l2-1)) return 1;
+    } else {
+      if (*s1 == *s2) return 1;
+    }
+  }
+  return 0;
+}
+
+int SuggestMgr::commoncharacterpositions(char * s1, const char * s2, int * is_swap) {
+  int num = 0;
+  int diff = 0;
+  int diffpos[2];
+  *is_swap = 0;
+  if (utf8) {
+    w_char su1[MAXSWL];
+    w_char su2[MAXSWL];
+    int l1 = u8_u16(su1, MAXSWL, s1);
+    int l2 = u8_u16(su2, MAXSWL, s2);
+    for (int i = 0; (i < l1) && (i < l2); i++) {
+      if (((short *) su1)[i] == ((short *) su2)[i]) {
+        num++;
+      } else {
+        if (diff < 2) diffpos[diff] = i;
+        diff++;
+      }
+    }
+    if ((diff == 2) && (l1 == l2) &&
+        (((short *) su1)[diffpos[0]] == ((short *) su2)[diffpos[1]]) &&
+        (((short *) su1)[diffpos[1]] == ((short *) su2)[diffpos[0]])) *is_swap = 1;
+  } else {
+    int i;
+    for (i = 0; (*(s1+i) != 0) && (*(s2+i) != 0); i++) {
+      if (*(s1+i) == *(s2+i)) {
+        num++;
+      } else {
+        if (diff < 2) diffpos[diff] = i;
+        diff++;
+      }
+    }
+    if ((diff == 2) && (*(s1+i) == 0) && (*(s2+i) == 0) &&
+      (*(s1+diffpos[0]) == *(s2+diffpos[1])) &&
+      (*(s1+diffpos[1]) == *(s2+diffpos[0]))) *is_swap = 1;
+  }
+  return num;
+}
+
+int SuggestMgr::mystrlen(const char * word) {
+  if (utf8) {
+    w_char w[MAXSWL];
+    return u8_u16(w, MAXSWL, word);
+  } else return strlen(word);
+}
 
 // sort in decreasing order of score
 void SuggestMgr::bubblesort(char** rword, int* rsc, int n )
@@ -544,3 +1593,66 @@ void SuggestMgr::bubblesort(char** rword, int* rsc, int n )
       return;
 }
 
+// longest common subsequence
+void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char ** result) {
+  int n, m;
+  w_char su[MAXSWL];
+  w_char su2[MAXSWL];
+  char * b;
+  char * c;
+  int i;
+  int j;
+  if (utf8) {
+    m = u8_u16(su, MAXSWL, s);
+    n = u8_u16(su2, MAXSWL, s2);
+  } else {
+    m = strlen(s);
+    n = strlen(s2);
+  }
+  c = (char *) malloc((m + 1) * (n + 1));
+  b = (char *) malloc((m + 1) * (n + 1));
+  for (i = 1; i <= m; i++) c[i*(n+1)] = 0;
+  for (j = 0; j <= n; j++) c[j] = 0;
+  for (i = 1; i <= m; i++) {
+    for (j = 1; j <= n; j++) {
+      if ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1))
+          || (!utf8) && ((*(s+i-1)) == (*(s2+j-1)))) {
+        c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1;
+        b[i*(n+1) + j] = LCS_UPLEFT;
+      } else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) {
+        c[i*(n+1) + j] = c[(i-1)*(n+1) + j];
+        b[i*(n+1) + j] = LCS_UP;
+      } else {
+        c[i*(n+1) + j] = c[i*(n+1) + j-1];
+        b[i*(n+1) + j] = LCS_LEFT;
+      }
+    }
+  }
+  *result = b;
+  free(c);
+  *l1 = m;
+  *l2 = n;
+}
+
+int SuggestMgr::lcslen(const char * s, const char* s2) {
+  int m;
+  int n;
+  int i;
+  int j;
+  char * result;
+  int len = 0;
+  lcs(s, s2, &m, &n, &result);
+  i = m;
+  j = n;
+  while ((i != 0) && (j != 0)) {
+    if (result[i*(n+1) + j] == LCS_UPLEFT) {
+      len++;
+      i--;
+      j--;
+    } else if (result[i*(n+1) + j] == LCS_UP) {
+      i--;
+    } else j--;
+  }
+  if (result) free(result);
+  return len;
+}
diff --git a/src/myspell/suggestmgr.hxx b/src/myspell/suggestmgr.hxx
index 7c5a6e2..5bc64bb 100644
--- a/src/myspell/suggestmgr.hxx
+++ b/src/myspell/suggestmgr.hxx
@@ -2,46 +2,85 @@
 #define _SUGGESTMGR_HXX_
 
 #define MAXSWL 100
-#define MAX_ROOTS 10
-#define MAX_WORDS 500
-#define MAX_GUESS 10
+#define MAXSWUTF8L (MAXSWL * 4)
+#define MAX_ROOTS 50
+#define MAX_WORDS 200
+#define MAX_GUESS 200
+#define MAXNGRAMSUGS 5
+
+#define MINTIMER 500
+#define MAXPLUSTIMER 500
 
 #define NGRAM_IGNORE_LENGTH 0
 #define NGRAM_LONGER_WORSE  1
 #define NGRAM_ANY_MISMATCH  2
 
-
 #include "atypes.hxx"
 #include "affixmgr.hxx"
 #include "hashmgr.hxx"
+#include "langnum.hxx"
+#include <time.h>
+
+enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
 
 class SuggestMgr
 {
   char *          ctry;
   int             ctryl;
+  w_char *        ctry_utf;
+
   AffixMgr*       pAMgr;
   int             maxSug;
-  bool            nosplitsugs;
+  struct cs_info * csconv;
+  struct unicode_info2 * utfconv;
+  int             utf8;
+  int             nosplitsugs;
+  int             maxngramsugs;
+  int             complexprefixes;
+
 
 public:
   SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
   ~SuggestMgr();
 
-  int suggest(char** wlst, int ns, const char * word);
-  int check(const char *, int);
+  int suggest(char*** slst, const char * word, int nsug);
   int ngsuggest(char ** wlst, char * word, HashMgr* pHMgr);
+  int suggest_auto(char*** slst, const char * word, int nsug);
+  int suggest_stems(char*** slst, const char * word, int nsug);
+  int suggest_pos_stems(char*** slst, const char * word, int nsug);
+
+  char * suggest_morph(const char * word);
+  char * suggest_morph_for_spelling_error(const char * word);
 
 private:
-   int replchars(char**, const char *, int);
-   int mapchars(char**, const char *, int);
-   int map_related(const char *, int, char ** wlst, int, const mapentry*, int);
-   int forgotchar(char **, const char *, int);
-   int swapchar(char **, const char *, int);
-   int extrachar(char **, const char *, int);
-   int badchar(char **, const char *, int);
-   int twowords(char **, const char *, int);
+   int check(const char *, int, int, int *, time_t *);
+   int check_forbidden(const char *, int);
+
+   int replchars(char**, const char *, int, int);
+   int doubledsyllable(char**, const char *, int, int);
+   int forgotchar(char **, const char *, int, int);
+   int swapchar(char **, const char *, int, int);
+   int extrachar(char **, const char *, int, int);
+   int badchar(char **, const char *, int, int);
+   int twowords(char **, const char *, int, int);
+   int fixstems(char **, const char *, int);
+
+   int forgotchar_utf(char**, const w_char *, int wl, int, int);
+   int extrachar_utf(char**, const w_char *, int wl, int, int);
+   int badchar_utf(char **, const w_char *, int wl, int, int);
+   int swapchar_utf(char **, const w_char *, int wl, int, int);
+
+   int mapchars(char**, const char *, int, int);
+   int map_related(const char *, int, char ** wlst, int, const mapentry*, int, int *, time_t *);
+   int map_related_utf(w_char *, int, int, char ** wlst, int, const mapentry*, int, int *, time_t *);
    int ngram(int n, char * s1, const char * s2, int uselen);
+   int mystrlen(const char * word);
+   int equalfirstletter(char * s1, const char * s2);
+   int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
    void bubblesort( char ** rwd, int * rsc, int n);
+   void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
+   int lcslen(const char * s, const char* s2);
+
 };
 
 #endif
author	Dom Lachowicz <domlachowicz@gmail.com>	2006-01-14 02:18:48 +0000
committer	Dom Lachowicz <domlachowicz@gmail.com>	2006-01-14 02:18:48 +0000
commit	7f5d852c3116af74620e630a776b6a8e03f8e5c9 (patch)
tree	08856630ce7f546ecafe18d7d68cbc41f13113ef
parent	48a8a34b95d427464cc9ca8af9fbf2900f1dcf30 (diff)
download	enchant-7f5d852c3116af74620e630a776b6a8e03f8e5c9.tar.gz