summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDom Lachowicz <domlachowicz@gmail.com>2008-01-06 15:02:36 +0000
committerDom Lachowicz <domlachowicz@gmail.com>2008-01-06 15:02:36 +0000
commit0e6694c9b1812f3deac32122f969c50d89bf05ad (patch)
tree341a860489bdc5d338712e5aacaf3cd18e923674
parentd7a2136697545d7ec7e6a2328d5377e32811e188 (diff)
downloadenchant-0e6694c9b1812f3deac32122f969c50d89bf05ad.tar.gz
upgrade to hunspell 1.2.1, released in November, 2007
git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@22579 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
-rw-r--r--src/myspell/README21
-rw-r--r--src/myspell/affentry.cxx300
-rw-r--r--src/myspell/affentry.hxx3
-rw-r--r--src/myspell/affixmgr.cxx704
-rw-r--r--src/myspell/affixmgr.hxx66
-rw-r--r--src/myspell/atypes.hxx30
-rw-r--r--src/myspell/baseaffix.hxx37
-rw-r--r--src/myspell/csutil.cxx326
-rw-r--r--src/myspell/csutil.hxx58
-rw-r--r--src/myspell/hashmgr.cxx133
-rw-r--r--src/myspell/hashmgr.hxx10
-rw-r--r--src/myspell/htypes.hxx13
-rw-r--r--src/myspell/hunspell.cxx533
-rw-r--r--src/myspell/hunspell.h54
-rw-r--r--src/myspell/hunspell.hxx71
-rw-r--r--src/myspell/license.hunspell26
-rw-r--r--src/myspell/makefile.mk113
-rw-r--r--src/myspell/suggestmgr.cxx373
-rw-r--r--src/myspell/suggestmgr.hxx2
19 files changed, 1657 insertions, 1216 deletions
diff --git a/src/myspell/README b/src/myspell/README
new file mode 100644
index 0000000..b452096
--- /dev/null
+++ b/src/myspell/README
@@ -0,0 +1,21 @@
+Hunspell spell checker and morphological analyser library
+
+Documentation, tests, examples: http://hunspell.sourceforge.net
+
+Author of Hunspell:
+László Németh (nemethl (at) gyorsposta.hu)
+
+Hunspell based on OpenOffice.org's Myspell. MySpell's author:
+Kevin Hendricks (kevin.hendricks (at) sympatico.ca)
+
+License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license
+
+The contents of this library may be used under the terms of
+the GNU General Public License Version 2 or later (the "GPL"), or
+the GNU Lesser General Public License Version 2.1 or later (the "LGPL",
+see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License
+Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html).
+
+Software distributed under these licenses is distributed on an "AS IS" basis,
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences
+for the specific language governing rights and limitations under the licenses.
diff --git a/src/myspell/affentry.cxx b/src/myspell/affentry.cxx
index 741496b..0ffe557 100644
--- a/src/myspell/affentry.cxx
+++ b/src/myspell/affentry.cxx
@@ -35,16 +35,17 @@ PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // number of conditions to match
- opts = dp->opts; // cross product flag
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
// then copy over all of the conditions
- memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+ if (opts & aeLONGCOND) {
+ memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
next = NULL;
nextne = NULL;
nexteq = NULL;
-#ifdef HUNSPELL_EXPERIMENTAL
morphcode = dp->morphcode;
-#endif
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
@@ -58,14 +59,8 @@ PfxEntry::~PfxEntry()
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
- if (opts & aeUTF8) {
- for (int i = 0; i < numconds; i++) {
- if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
- }
- }
-#ifdef HUNSPELL_EXPERIMENTAL
+ if (opts & aeLONGCOND) free(c.l.conds2);
if (morphcode && !(opts & aeALIASM)) free(morphcode);
-#endif
if (contclass && !(opts & aeALIASF)) free(contclass);
}
@@ -89,47 +84,71 @@ char * PfxEntry::add(const char * word, int len)
return NULL;
}
+inline char * PfxEntry::nextchar(char * p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.conds + MAXCONDLEN_1) return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN) return NULL;
+ }
+ return p;
+}
inline int PfxEntry::test_condition(const char * st)
{
- int cond;
- unsigned char * cp = (unsigned char *)st;
- if (!(opts & aeUTF8)) { // 256-character codepage
- for (cond = 0; cond < numconds; cond++) {
- if ((conds.base[*cp++] & (1 << cond)) == 0) return 0;
- }
- } else { // UTF-8 encoding
- unsigned short wc;
- for (cond = 0; cond < numconds; cond++) {
- // a simple 7-bit ASCII character in UTF-8
- if ((*cp >> 7) == 0) {
- // also check limit (end of word)
- if ((!*cp) || ((conds.utf8.ascii[*cp++] & (1 << cond)) == 0)) return 0;
- // UTF-8 multibyte character
- } else {
- // not dot wildcard in rule
- if (!conds.utf8.all[cond]) {
- if (conds.utf8.neg[cond]) {
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (conds.utf8.wchars[cond] &&
- flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short) conds.utf8.wlen[cond])) return 0;
- } else {
- if (!conds.utf8.wchars[cond]) return 0;
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short)conds.utf8.wlen[cond])) return 0;
- }
+ const char * pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0) return 1;
+ char * p = c.conds;
+ while (1) {
+ switch (*p) {
+ case '\0': return 1;
+ case '[': { p = nextchar(p); pos = st; break; }
+ case '^': { p = nextchar(p); neg = true; break; }
+ case ']': { if ((neg && ingroup) || (!neg && !ingroup)) return 0;
+ pos = NULL;
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ st++;
+ if (*st == '\0' && p && *p != '\0') return 0; // word <= condition
+ break;
+ }
+ case '.': if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++);
+ if (*st == '\0') return 0; // word <= condition
+ break;
+ }
+ default: {
+ if (*st == *p) {
+ st++;
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
+ while (p && (*p & 0xc0) == 0x80) { // character
+ if (*p != *st) {
+ if (!pos) return 0;
+ st = pos;
+ break;
+ }
+ p = nextchar(p);
+ st++;
+ }
+ if (st != pos) ingroup = true;
+ } else if (pos) ingroup = true;
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else return 0;
}
- // jump to next UTF-8 character
- for(cp++; (*cp & 0xc0) == 0x80; cp++);
- }
}
+ if (!p) return 1;
}
- return 1;
}
-
// check if this prefix entry matches
struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound, const FLAG needflag)
{
@@ -144,14 +163,15 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if (tmpl > 0) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
if (stripl) strcpy (tmpword, strip);
strcpy ((tmpword + stripl), (word + appndl));
-
+
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
// this file for more info on exactly what is being
@@ -165,8 +185,8 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with pseudoroot flag
- ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // forbid single prefixes with needaffix flag
+ ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
// needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
(contclass && TESTAFF(contclass, needflag, contclasslen))))
@@ -205,7 +225,8 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -237,7 +258,6 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
// check if this prefix entry matches
char * PfxEntry::check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag)
@@ -252,7 +272,8 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -302,7 +323,8 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing prefix and adding
// back any characters that would have been stripped
@@ -323,15 +345,25 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
if ((he = pmyMgr->lookup(tmpword)) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
- // forbid single prefixes with pseudoroot flag
- ! TESTAFF(contclass, pmyMgr->get_pseudoroot(), contclasslen) &&
+ // forbid single prefixes with needaffix flag
+ ! TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
// needflag
((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
- (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
- if (morphcode) strcat(result, morphcode); else strcat(result,getKey());
- if (he->description) {
- if ((*(he->description)=='[')||(*(he->description)=='<')) strcat(result,he->word);
- strcat(result,he->description);
+ (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+ if (morphcode) {
+ strcat(result, " ");
+ strcat(result, morphcode);
+ } else strcat(result,getKey());
+ if (!HENTRY_FIND(he, MORPH_STEM)) {
+ strcat(result, " ");
+ strcat(result, MORPH_STEM);
+ strcat(result,HENTRY_WORD(he));
+ }
+ // store the pointer of the hash entry
+// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, he);
+ if (HENTRY_DATA(he)) {
+ strcat(result, " ");
+ strcat(result,HENTRY_DATA(he));
}
strcat(result, "\n");
}
@@ -357,7 +389,6 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
if (*result) return mystrdup(result);
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
{
@@ -370,17 +401,17 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
appnd = dp->appnd; // string to append
stripl = dp->stripl; // length of strip string
appndl = dp->appndl; // length of append string
- numconds = dp->numconds; // number of conditions to match
- opts = dp->opts; // cross product flag
+ numconds = dp->numconds; // length of the condition
+ opts = dp->opts; // cross product flag
// then copy over all of the conditions
- memcpy(&conds.base[0],&dp->conds.base[0],SETSIZE*sizeof(conds.base[0]));
+ if (opts & aeLONGCOND) {
+ memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
+ c.l.conds2 = dp->c.l.conds2;
+ } else memcpy(c.conds, dp->c.conds, MAXCONDLEN);
rappnd = myrevstrdup(appnd);
-
-#ifdef HUNSPELL_EXPERIMENTAL
morphcode = dp->morphcode;
-#endif
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
@@ -395,14 +426,8 @@ SfxEntry::~SfxEntry()
pmyMgr = NULL;
appnd = NULL;
strip = NULL;
- if (opts & aeUTF8) {
- for (int i = 0; i < numconds; i++) {
- if (conds.utf8.wchars[i]) free(conds.utf8.wchars[i]);
- }
- }
-#ifdef HUNSPELL_EXPERIMENTAL
+ if (opts & aeLONGCOND) free(c.l.conds2);
if (morphcode && !(opts & aeALIASM)) free(morphcode);
-#endif
if (contclass && !(opts & aeALIASF)) free(contclass);
}
@@ -427,50 +452,92 @@ char * SfxEntry::add(const char * word, int len)
return NULL;
}
+inline char * SfxEntry::nextchar(char * p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.l.conds1 + MAXCONDLEN_1) return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN) return NULL;
+ return p;
+}
inline int SfxEntry::test_condition(const char * st, const char * beg)
{
- int cond;
- unsigned char * cp = (unsigned char *) st;
- if (!(opts & aeUTF8)) { // 256-character codepage
- // Domolki affix algorithm
- for (cond = numconds; --cond >= 0; ) {
- if ((conds.base[*--cp] & (1 << cond)) == 0) return 0;
- }
- } else { // UTF-8 encoding
- unsigned short wc;
- for (cond = numconds; --cond >= 0; ) {
- // go to next character position and check limit
- if ((char *) --cp < beg) return 0;
- // a simple 7-bit ASCII character in UTF-8
- if ((*cp >> 7) == 0) {
- if ((conds.utf8.ascii[*cp] & (1 << cond)) == 0) return 0;
- // UTF-8 multibyte character
- } else {
- // go to first character of UTF-8 multibyte character
- for (; (*cp & 0xc0) == 0x80; cp--);
- // not dot wildcard in rule
- if (!conds.utf8.all[cond]) {
- if (conds.utf8.neg[cond]) {
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (conds.utf8.wchars[cond] &&
- flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short) conds.utf8.wlen[cond])) return 0;
- } else {
- if (!conds.utf8.wchars[cond]) return 0;
- u8_u16((w_char *) &wc, 1, (char *) cp);
- if (!flag_bsearch((unsigned short *)conds.utf8.wchars[cond],
- wc, (short)conds.utf8.wlen[cond])) return 0;
+ const char * pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0) return 1;
+ char * p = c.conds;
+ st--;
+ int c = 1;
+ while (1) {
+ switch (*p) {
+ case '\0': return 1;
+ case '[': { p = nextchar(p); pos = st; break; }
+ case '^': { p = nextchar(p); neg = true; break; }
+ case ']': { if (!neg && !ingroup) return 0;
+ c++;
+ pos = NULL;
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ st--;
+ if (st < beg && p && *p != '\0') return 0; // word <= condition
+ break;
+ }
+ case '.': if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--);
+ if (st < beg) return 0; // word <= condition
+ if (*st & 0x80) { // head of the UTF-8 character
+ st--;
+ if (st < beg) return 0; // word <= condition
}
+ break;
+ }
+ default: {
+ if (*st == *p) {
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*st & 0x80)) {
+ st--;
+ while (p && (st >= beg)) {
+ if (*p != *st) {
+ if (!pos) return 0;
+ st = pos;
+ break;
+ }
+ // first byte of the UTF-8 multibyte character
+ if ((*p & 0xc0) != 0x80) break;
+ p = nextchar(p);
+ st--;
+ }
+ if (pos && st != pos) {
+ if (neg) return 0;
+ else if (c == numconds) return 1;
+ ingroup = true;
+ }
+ if (p && *p != '\0') p = nextchar(p);
+ } else if (pos) {
+ if (neg) return 0;
+ else if (c == numconds) return 1;
+ ingroup = true;
+ }
+ if (!pos) {
+ c++;
+ st--;
+ if (st < beg && p && *p != '\0') return 0; // word <= condition
+ }
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else return 0;
}
- }
}
+ if (!p) return 1;
}
- return 1;
}
-
-
// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
@@ -497,7 +564,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
// the second condition is not enough for UTF-8 strings
// it checked in test_condition()
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -513,7 +581,8 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
// now make sure all of the conditions on characters
// are met. Please see the appendix at the end of
- // this file for more info on exactly what is being // tested
+ // this file for more info on exactly what is being
+ // tested
// if all conditions are met then check if resulting
// root word in the dictionary
@@ -595,7 +664,8 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -632,7 +702,6 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
@@ -660,7 +729,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
tmpl = len - appndl;
- if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+ if ((tmpl > 0) && (tmpl + stripl >= numconds)) {
+// if (tmpl > 0) {
// generate new root word by removing suffix and adding
// back any characters that would have been stripped or
@@ -689,6 +759,7 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
if (st) {
if (((PfxEntry *) ppfx)->getMorph()) {
strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ strcat(result, " ");
}
strcat(result,st);
free(st);
@@ -715,7 +786,6 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
}
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
// get next homonym with same affix
struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
diff --git a/src/myspell/affentry.hxx b/src/myspell/affentry.hxx
index bb21773..ef1f86d 100644
--- a/src/myspell/affentry.hxx
+++ b/src/myspell/affentry.hxx
@@ -54,6 +54,7 @@ public:
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
+ inline char * nextchar(char * p);
inline int test_condition(const char * st);
};
@@ -123,7 +124,9 @@ public:
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
+ inline char * nextchar(char * p);
inline int test_condition(const char * st, const char * begin);
+
};
#endif
diff --git a/src/myspell/affixmgr.cxx b/src/myspell/affixmgr.cxx
index a853f82..d3e36be 100644
--- a/src/myspell/affixmgr.cxx
+++ b/src/myspell/affixmgr.cxx
@@ -56,11 +56,11 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
checkcompoundtriple = 0; // forbid compounds with triple letters
- forbiddenword = FLAG_NULL; // forbidden word signing flag
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag
nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
lang = NULL; // language
langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
- pseudoroot = FLAG_NULL; // forbidden root, allowed only with suffixes
+ needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
cpdwordmax = -1; // default: unlimited wordcount in compound words
cpdmin = -1; // undefined
cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
@@ -90,6 +90,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
sugswithdots = 0;
keepcase = 0;
checksharps = 0;
+ substandard = FLAG_NULL;
derived = NULL; // XXX not threadsafe variable for experimental stemming
sfx = NULL;
@@ -218,7 +219,7 @@ AffixMgr::~AffixMgr()
FREE_FLAG(compoundroot);
FREE_FLAG(forbiddenword);
FREE_FLAG(nosuggest);
- FREE_FLAG(pseudoroot);
+ FREE_FLAG(needaffix);
FREE_FLAG(lemma_present);
FREE_FLAG(circumfix);
FREE_FLAG(onlyincompound);
@@ -453,17 +454,17 @@ int AffixMgr::parse_file(const char * affpath)
}
}
- /* parse in the flag used by `pseudoroots' */
+ /* parse in the flag used by `needaffixs' */
if (strncmp(line,"PSEUDOROOT",10) == 0) {
- if (parse_flag(line, &pseudoroot, "PSEUDOROOT")) {
+ if (parse_flag(line, &needaffix, "PSEUDOROOT")) {
fclose(afflst);
return 1;
}
}
- /* parse in the flag used by `pseudoroots' */
+ /* parse in the flag used by `needaffixs' */
if (strncmp(line,"NEEDAFFIX",9) == 0) {
- if (parse_flag(line, &pseudoroot, "NEEDAFFIX")) {
+ if (parse_flag(line, &needaffix, "NEEDAFFIX")) {
fclose(afflst);
return 1;
}
@@ -602,6 +603,14 @@ int AffixMgr::parse_file(const char * affpath)
}
}
+ /* parse in the flag used by the affix generator */
+ if (strncmp(line,"SUBSTANDARD",11) == 0) {
+ if (parse_flag(line, &substandard, "SUBSTANDARD")) {
+ fclose(afflst);
+ return 1;
+ }
+ }
+
if (strncmp(line,"CHECKSHARPS",11) == 0) {
checksharps=1;
}
@@ -941,191 +950,40 @@ int AffixMgr::process_sfx_order()
return 0;
}
-
-
-// takes aff file condition string and creates the
-// conds array - please see the appendix at the end of the
-// file affentry.cxx which describes what is going on here
-// in much more detail
-
-int AffixMgr::encodeit(struct affentry * ptr, char * cs)
+// calculate the character length of the condition
+int AffixMgr::condlen(char * st)
{
- unsigned char c;
- int i, j, k;
- unsigned char mbr[MAXLNLEN];
- w_char wmbr[MAXLNLEN];
- w_char * wpos = wmbr;
-
- // now clear the conditions array */
- for (i=0;i<SETSIZE;i++) ptr->conds.base[i] = (unsigned char) 0;
-
- // now parse the string to create the conds array */
- int nc = strlen(cs);
- unsigned char neg = 0; // complement indicator
- int grp = 0; // group indicator
- unsigned char n = 0; // number of conditions
- int ec = 0; // end condition indicator
- int nm = 0; // number of member in group
-
- // if no condition just return
- if (strcmp(cs,".")==0) {
- ptr->numconds = 0;
- return 0;
+ int l = 0;
+ bool group = false;
+ for(; *st; st++) {
+ if (*st == '[') {
+ group = true;
+ l++;
+ } else if (*st == ']') group = false;
+ else if (!group && (!utf8 ||
+ (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
}
+ return l;
+}
- i = 0;
- while (i < nc) {
- c = *((unsigned char *)(cs + i));
-
- // start group indicator
- if (c == '[') {
- grp = 1;
- c = 0;
- }
-
- // complement flag
- if ((grp == 1) && (c == '^')) {
- neg = 1;
- c = 0;
- }
-
- // end goup indicator
- if (c == ']') {
- ec = 1;
- c = 0;
- }
-
- // add character of group to list
- if ((grp == 1) && (c != 0)) {
- *(mbr + nm) = c;
- nm++;
- c = 0;
- }
-
- // end of condition
- if (c != 0) {
- ec = 1;
+int AffixMgr::encodeit(struct affentry * ptr, char * cs)
+{
+ if (strcmp(cs,".") != 0) {
+ ptr->numconds = condlen(cs);
+ strncpy(ptr->c.conds, cs, MAXCONDLEN);
+ // long condition (end of conds padded by strncpy)
+ if (ptr->c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
+ ptr->opts += aeLONGCOND;
+ ptr->c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
}
-
- if (ec) {
- if (!utf8) {
- if (grp == 1) {
- if (neg == 0) {
- // set the proper bits in the condition array vals for those chars
- for (j=0;j<nm;j++) {
- k = (unsigned int) mbr[j];
- ptr->conds.base[k] = ptr->conds.base[k] | ((unsigned char)1 << n);
- }
- } else {
- // complement so set all of them and then unset indicated ones
- for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | ((unsigned char)1 << n);
- for (j=0;j<nm;j++) {
- k = (unsigned int) mbr[j];
- ptr->conds.base[k] = ptr->conds.base[k] & ~((unsigned char)1 << n);
- }
- }
- neg = 0;
- grp = 0;
- nm = 0;
- } else {
- // not a group so just set the proper bit for this char
- // but first handle special case of . inside condition
- if (c == '.') {
- // wild card character so set them all
- for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | ((unsigned char)1 << n);
- } else {
- ptr->conds.base[(unsigned int) c] = ptr->conds.base[(unsigned int)c] | ((unsigned char)1 << n);
- }
- }
- n++;
- ec = 0;
- } else { // UTF-8 character set
- if (grp == 1) {
- ptr->conds.utf8.neg[n] = neg;
- if (neg == 0) {
- // set the proper bits in the condition array vals for those chars
- for (j=0;j<nm;j++) {
- k = (unsigned int) mbr[j];
- if (k >> 7) {
- u8_u16(wpos, 1, (char *) mbr + j);
- wpos++;
- if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
- } else {
- ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] | ((unsigned char)1 << n);
- }
- }
- } else { // neg == 1
- // complement so set all of them and then unset indicated ones
- for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | ((unsigned char)1 << n);
- for (j=0;j<nm;j++) {
- k = (unsigned int) mbr[j];
- if (k >> 7) {
- u8_u16(wpos, 1, (char *) mbr + j);
- wpos++;
- if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
- } else {
- ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] & ~((unsigned char)1 << n);
- }
- }
- }
- neg = 0;
- grp = 0;
- nm = 0;
- ptr->conds.utf8.wlen[n] = wpos - wmbr;
- if ((wpos - wmbr) != 0) {
- ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char) * (wpos - wmbr));
- if (!ptr->conds.utf8.wchars[n]) return 1;
- memcpy(ptr->conds.utf8.wchars[n], wmbr, sizeof(w_char) * (wpos - wmbr));
- flag_qsort((unsigned short *) ptr->conds.utf8.wchars[n], 0, ptr->conds.utf8.wlen[n]);
- wpos = wmbr;
- }
- } else { // grp == 0
- // is UTF-8 character?
- if (c >> 7) {
- ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char));
- if (!ptr->conds.utf8.wchars[n]) return 1;
- ptr->conds.utf8.wlen[n] = 1;
- u8_u16(ptr->conds.utf8.wchars[n], 1, cs + i);
- if ((c & 0xe0) == 0xe0) i+=2; else i++; // 3-byte UFT-8 character
- } else {
- ptr->conds.utf8.wchars[n] = NULL;
- // not a group so just set the proper bit for this char
- // but first handle special case of . inside condition
- if (c == '.') {
- ptr->conds.utf8.all[n] = 1;
- // wild card character so set them all
- for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | ((unsigned char)1 << n);
- } else {
- ptr->conds.utf8.all[n] = 0;
- ptr->conds.utf8.ascii[(unsigned int) c] = ptr->conds.utf8.ascii[(unsigned int)c] | ((unsigned char)1 << n);
- }
- }
- neg = 0;
- }
- n++;
- ec = 0;
- neg = 0;
- }
- }
-
- i++;
+ } else {
+ ptr->numconds = 0;
+ ptr->c.conds[0] = '\0';
}
- ptr->numconds = n;
return 0;
}
- // return 1 if s1 is a leading subset of s2
-/* inline int AffixMgr::isSubset(const char * s1, const char * s2)
- {
- while ((*s1 == *s2) && *s1) {
- s1++;
- s2++;
- }
- return (*s1 == '\0');
- }
-*/
-
- // return 1 if s1 is a leading subset of s2 (dots are for infixes)
+// return 1 if s1 is a leading subset of s2 (dots are for infixes)
inline int AffixMgr::isSubset(const char * s1, const char * s2)
{
while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
@@ -1235,7 +1093,6 @@ struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
// check word for prefixes
char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
const FLAG needflag)
@@ -1331,8 +1188,6 @@ char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
if (*result) return mystrdup(result);
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
-
// Is word a non compound with a REP substitution (see checkcompoundrep)?
int AffixMgr::cpdrep_check(const char * word, int wl)
@@ -1579,7 +1434,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
// search homonym with compound flag
while ((rv) && !hu_mov_rule &&
- ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundbegin && !wordnum &&
TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
@@ -1613,9 +1468,9 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
) checked_prefix = 1;
- // else check forbiddenwords and pseudoroot
+ // else check forbiddenwords and needaffix
} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
- TESTAFF(rv->astr, pseudoroot, rv->alen) ||
+ TESTAFF(rv->astr, needaffix, rv->alen) ||
(is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
)) {
st[i] = ch;
@@ -1728,7 +1583,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
rv = lookup((word+i)); // perhaps without prefix
// search homonym with compound flag
- while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+ while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
(numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
@@ -1768,7 +1623,7 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
&& (
((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
((cpdmaxsyllable==0) ||
- (numsyllable + get_syllable(&(rv->word), rv->clen)<=cpdmaxsyllable))
+ (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
)
&& (
(!checkcompounddup || (rv != rv_first))
@@ -1901,7 +1756,6 @@ struct hentry * AffixMgr::compound_check(const char * word, int len,
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
// check if compound word is correctly spelled
// hu_mov_rule = spec. Hungarian rule (XXX)
int AffixMgr::compound_check_morph(const char * word, int len,
@@ -1963,7 +1817,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
// search homonym with compound flag
while ((rv) && !hu_mov_rule &&
- ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundbegin && !wordnum &&
TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
@@ -1977,13 +1831,16 @@ int AffixMgr::compound_check_morph(const char * word, int len,
}
if (rv) {
- if (rv->description) {
- if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
- strcat(presult, st);
- strcat(presult, rv->description);
+ sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
}
- }
-
+ // store the pointer of the hash entry
+// sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
+ if (HENTRY_DATA(rv)) {
+ sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA(rv));
+ }
+ }
if (!rv) {
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
@@ -2006,35 +1863,28 @@ int AffixMgr::compound_check_morph(const char * word, int len,
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
) {
- //char * p = prefix_check_morph(st, i, 0, compound);
+ // char * p = prefix_check_morph(st, i, 0, compound);
char * p = NULL;
if (compoundflag) p = affix_check_morph(st, i, compoundflag);
if (!p || (*p == '\0')) {
+ if (p) free(p);
+ p = NULL;
if ((wordnum == 0) && compoundbegin) {
p = affix_check_morph(st, i, compoundbegin);
} else if ((wordnum > 0) && compoundmiddle) {
p = affix_check_morph(st, i, compoundmiddle);
}
}
- if (*p != '\0') {
- line_uniq(p);
- if (strchr(p, '\n')) {
- strcat(presult, "(");
- strcat(presult, line_join(p, '|'));
- strcat(presult, ")");
- } else {
- strcat(presult, p);
- }
- }
- if (presult[strlen(presult) - 1] == '\n') {
- presult[strlen(presult) - 1] = '\0';
+ if (p && (*p != '\0')) {
+ sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
+ MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
}
+ if (p) free(p);
checked_prefix = 1;
- //strcat(presult, "+");
}
// else check forbiddenwords
} else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
- TESTAFF(rv->astr, pseudoroot, rv->alen))) {
+ TESTAFF(rv->astr, needaffix, rv->alen))) {
st[i] = ch;
continue;
}
@@ -2137,7 +1987,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
rv = lookup((word+i)); // perhaps without prefix
// search homonym with compound flag
- while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
+ while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
!((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
(compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
(numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
@@ -2146,11 +1996,21 @@ int AffixMgr::compound_check_morph(const char * word, int len,
if (rv && words && words[wnum + 1]) {
strcat(*result, presult);
- if (complexprefixes && rv->description) strcat(*result, rv->description);
- if (rv->description && ((!rv->astr) ||
- !TESTAFF(rv->astr, lemma_present, rv->alen)))
- strcat(*result, &(rv->word));
- if (!complexprefixes && rv->description) strcat(*result, rv->description);
+ strcat(*result, " ");
+ strcat(*result, MORPH_PART);
+ strcat(*result, word+i);
+ if (complexprefixes && HENTRY_DATA(rv)) strcat(*result, HENTRY_DATA(rv));
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ strcat(*result, " ");
+ strcat(*result, MORPH_STEM);
+ strcat(*result, HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+// sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ strcat(*result, " ");
+ strcat(*result, HENTRY_DATA(rv));
+ }
strcat(*result, "\n");
ok = 1;
return 0;
@@ -2187,7 +2047,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
&& (
((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
((cpdmaxsyllable==0) ||
- (numsyllable+get_syllable(&(rv->word),rv->wlen)<=cpdmaxsyllable))
+ (numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
)
&& (
(!checkcompounddup || (rv != rv_first))
@@ -2196,12 +2056,23 @@ int AffixMgr::compound_check_morph(const char * word, int len,
{
// bad compound word
strcat(*result, presult);
+ strcat(*result, " ");
+ strcat(*result, MORPH_PART);
+ strcat(*result, word+i);
- if (rv->description) {
- if (complexprefixes) strcat(*result, rv->description);
- if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
- strcat(*result, &(rv->word));
- if (!complexprefixes) strcat(*result, rv->description);
+ if (HENTRY_DATA(rv)) {
+ if (complexprefixes) strcat(*result, HENTRY_DATA(rv));
+ if (! HENTRY_FIND(rv, MORPH_STEM)) {
+ strcat(*result, " ");
+ strcat(*result, MORPH_STEM);
+ strcat(*result, HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+// sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
+ if (!complexprefixes) {
+ strcat(*result, " ");
+ strcat(*result, HENTRY_DATA(rv));
+ }
}
strcat(*result, "\n");
ok = 1;
@@ -2227,20 +2098,16 @@ int AffixMgr::compound_check_morph(const char * word, int len,
if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
char * m = NULL;
if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
- if ((!m || *m == '\0') && compoundend)
+ if ((!m || *m == '\0') && compoundend) {
+ if (m) free(m);
m = affix_check_morph((word+i),strlen(word+i), compoundend);
+ }
strcat(*result, presult);
- if (m) {
- line_uniq(m);
- if (strchr(m, '\n')) {
- strcat(*result, "(");
- strcat(*result, line_join(m, '|'));
- strcat(*result, ")");
- } else {
- strcat(*result, m);
- }
- free(m);
+ if (m || (*m != '\0')) {
+ sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
+ MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
}
+ if (m) free(m);
strcat(*result, "\n");
ok = 1;
}
@@ -2259,7 +2126,7 @@ int AffixMgr::compound_check_morph(const char * word, int len,
// check forbiddenwords
if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
- && (! TESTAFF(rv->astr, pseudoroot, rv->alen))) {
+ && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
st[i] = ch;
continue;
}
@@ -2311,21 +2178,17 @@ int AffixMgr::compound_check_morph(const char * word, int len,
)) {
char * m = NULL;
if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
- if ((!m || *m == '\0') && compoundend)
+ if ((!m || *m == '\0') && compoundend) {
+ if (m) free(m);
m = affix_check_morph((word+i),strlen(word+i), compoundend);
+ }
strcat(*result, presult);
- if (m) {
- line_uniq(m);
- if (strchr(m, '\n')) {
- strcat(*result, "(");
- strcat(*result, line_join(m, '|'));
- strcat(*result, ")");
- } else {
- strcat(*result, m);
- }
- free(m);
+ if (m && (*m != '\0')) {
+ sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
+ MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
}
- strcat(*result, "\n");
+ if (m) free(m);
+ sprintf(*result + strlen(*result), "%c", MSEP_REC);
ok = 1;
}
@@ -2346,7 +2209,6 @@ int AffixMgr::compound_check_morph(const char * word, int len,
}
return 0;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
// return 1 if s1 (reversed) is a leading subset of end of s2
/* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
@@ -2402,11 +2264,11 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
// fogemorpheme
(in_compound ||
!((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
- // pseudoroot on prefix or first suffix
+ // needaffix on prefix or first suffix
(cclass ||
- !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+ !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
(ppfx && !((ep->getCont()) &&
- TESTAFF(ep->getCont(), pseudoroot,
+ TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
) {
@@ -2444,11 +2306,11 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
// fogemorpheme
(in_compound ||
!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
- // pseudoroot on prefix or first suffix
+ // needaffix on prefix or first suffix
(cclass ||
- !(sptr->getCont() && TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
+ !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
(ppfx && !((ep->getCont()) &&
- TESTAFF(ep->getCont(), pseudoroot,
+ TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
) {
@@ -2462,9 +2324,15 @@ struct hentry * AffixMgr::suffix_check (const char * word, int len,
if (!derived) {
derived = mystrdup(word);
} else {
+ strcat(result, " ");
+ strcpy(result, MORPH_STEM);
strcpy(result, derived); // XXX check size
strcat(result, "\n");
+ strcat(result, " ");
+ strcat(result, MORPH_STEM);
strcat(result, word);
+ // store the pointer of the hash entry
+// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
free(derived);
derived = mystrdup(result);
}
@@ -2523,7 +2391,6 @@ struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
return NULL;
}
-#ifdef HUNSPELL_EXPERIMENTAL
char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
int sfxopts, AffEntry * ppfx, const FLAG needflag)
{
@@ -2545,11 +2412,17 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
if (st) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ if (((PfxEntry *) ppfx)->getMorph()) {
+ strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ strcat(result, " ");
+ }
}
strcat(result, st);
free(st);
- if (se->getMorph()) strcat(result, se->getMorph());
+ if (se->getMorph()) {
+ strcat(result, " ");
+ strcat(result, se->getMorph());
+ }
strcat(result, "\n");
}
}
@@ -2581,7 +2454,10 @@ char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
} else sprintf(result3, "<%c>", flag);
strcat(result3, ":");
#endif
- if (sptr->getMorph()) strcat(result3, sptr->getMorph());
+ if (sptr->getMorph()) {
+ strcat(result3, " ");
+ strcat(result3, sptr->getMorph());
+ }
strlinecat(result2, result3);
strcat(result2, "\n");
strcat(result, result2);
@@ -2627,25 +2503,39 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
// fogemorpheme
(in_compound ||
!((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
- // pseudoroot on prefix or first suffix
+ // needaffix on prefix or first suffix
(cclass ||
- !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
+ !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
(ppfx && !((ep->getCont()) &&
- TESTAFF(ep->getCont(), pseudoroot,
+ TESTAFF(ep->getCont(), needaffix,
ep->getContLen())))
)
))
rv = se->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
while (rv) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ if (((PfxEntry *) ppfx)->getMorph()) {
+ strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ strcat(result, " ");
+ }
+ }
+ if (complexprefixes && HENTRY_DATA(rv)) strcat(result, HENTRY_DATA(rv));
+ if (! HENTRY_FIND(rv, MORPH_STEM)) {
+ strcat(result, " ");
+ strcat(result, MORPH_STEM);
+ strcat(result, HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ strcat(result, " ");
+ strcat(result, HENTRY_DATA(rv));
+ }
+ if (se->getMorph()) {
+ strcat(result, " ");
+ strcat(result, se->getMorph());
}
- if (complexprefixes && rv->description) strcat(result, rv->description);
- if (rv->description && ((!rv->astr) ||
- !TESTAFF(rv->astr, lemma_present, rv->alen)))
- strcat(result, &(rv->word));
- if (!complexprefixes && rv->description) strcat(result, rv->description);
- if (se->getMorph()) strcat(result, se->getMorph());
strcat(result, "\n");
rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
@@ -2676,18 +2566,30 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
// fogemorpheme
(in_compound ||
!((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
- // pseudoroot on first suffix
+ // needaffix on first suffix
(cclass || !(sptr->getCont() &&
- TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())))
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
)) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
while (rv) {
if (ppfx) {
- if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ if (((PfxEntry *) ppfx)->getMorph()) {
+ strcat(result, ((PfxEntry *) ppfx)->getMorph());
+ strcat(result, " ");
+ }
}
- if (complexprefixes && rv->description) strcat(result, rv->description);
- if (rv->description && ((!rv->astr) ||
- !TESTAFF(rv->astr, lemma_present, rv->alen))) strcat(result, &(rv->word));
- if (!complexprefixes && rv->description) strcat(result, rv->description);
+ if (complexprefixes && HENTRY_DATA(rv)) strcat(result, HENTRY_DATA(rv));
+ if (! HENTRY_FIND(rv, MORPH_STEM)) {
+ strcat(result, " ");
+ strcat(result, MORPH_STEM);
+ strcat(result, HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+// sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ strcat(result, " ");
+ strcat(result, HENTRY_DATA(rv));
+ }
#ifdef DEBUG
unsigned short flag = sptr->getFlag();
if (flag_mode == FLAG_NUM) {
@@ -2698,7 +2600,10 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
strcat(result, ":");
#endif
- if (sptr->getMorph()) strcat(result, sptr->getMorph());
+ if (sptr->getMorph()) {
+ strcat(result, " ");
+ strcat(result, sptr->getMorph());
+ }
strcat(result, "\n");
rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
}
@@ -2711,8 +2616,6 @@ char * AffixMgr::suffix_check_morph(const char * word, int len,
if (*result) return mystrdup(result);
return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
-
// check if word with affixes is correctly spelled
struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
@@ -2741,7 +2644,6 @@ struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG ne
return rv;
}
-#ifdef HUNSPELL_EXPERIMENTAL
// check if word with affixes is correctly spelled
char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
{
@@ -2781,20 +2683,95 @@ char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needfl
free(st);
}
}
-
+
return mystrdup(result);
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
+
+char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
+ unsigned short al, char * morph, char * targetmorph, int level)
+{
+ // handle suffixes
+ char * stemmorph;
+ char * stemmorphcatpos;
+ char mymorph[MAXLNLEN];
+
+ if (!morph && !targetmorph) return NULL;
+
+ // check substandard flag
+ if (TESTAFF(ap, substandard, al)) return NULL;
+
+ if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
+
+// int targetcount = get_sfxcount(targetmorph);
+
+ // use input suffix fields, if exist
+ if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
+ stemmorph = mymorph;
+ strcpy(stemmorph, morph);
+ strcat(stemmorph, " ");
+ stemmorphcatpos = stemmorph + strlen(stemmorph);
+ } else {
+ stemmorph = morph;
+ stemmorphcatpos = NULL;
+ }
+
+ for (int i = 0; i < al; i++) {
+ const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
+ SfxEntry * sptr = (SfxEntry *)sFlag[c];
+ while (sptr) {
+ if (sptr->getFlag() == ap[i] && ((sptr->getContLen() == 0) ||
+ // don't generate forms with substandard affixes
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
+
+ if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
+ else stemmorph = (char *) sptr->getMorph();
+
+ int cmp = morphcmp(stemmorph, targetmorph);
+
+ if (cmp == 0) {
+ char * newword = sptr->add(ts, wl);
+ if (newword) {
+ hentry * check = pHMgr->lookup(newword);
+ if (!check || !check->astr ||
+ !TESTAFF(check->astr, forbiddenword, check->alen)) {
+ return newword;
+ }
+ free(newword);
+ }
+ }
+
+ // recursive call for secondary suffixes
+ if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
+// (get_sfxcount(stemmorph) < targetcount) &&
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
+ char * newword = sptr->add(ts, wl);
+ if (newword) {
+ char * newword2 = morphgen(newword, strlen(newword), sptr->getCont(),
+ sptr->getContLen(), stemmorph, targetmorph, 1);
+
+ if (newword2) {
+ free(newword);
+ return newword2;
+ }
+ free(newword);
+ newword = NULL;
+ }
+ }
+ }
+ sptr = (SfxEntry *)sptr ->getFlgNxt();
+ }
+ }
+ return NULL;
+}
int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
char * phone)
{
-
int nh=0;
// first add root word to list
- if ((nh < maxn) && !(al && ((pseudoroot && TESTAFF(ap, pseudoroot, al)) ||
+ if ((nh < maxn) && !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
(onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
wlst[nh].word = mystrdup(ts);
wlst[nh].allow = (1 == 0);
@@ -2816,9 +2793,9 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
while (sptr) {
if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
(strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
- // check pseudoroot flag
- !(sptr->getCont() && ((pseudoroot &&
- TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
+ // check needaffix flag
+ !(sptr->getCont() && ((needaffix &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
(circumfix &&
TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
(onlyincompound &&
@@ -2888,9 +2865,9 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
while (ptr) {
if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
(strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
- // check pseudoroot flag
- !(ptr->getCont() && ((pseudoroot &&
- TESTAFF(ptr->getCont(), pseudoroot, ptr->getContLen())) ||
+ // check needaffix flag
+ !(ptr->getCont() && ((needaffix &&
+ TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
(circumfix &&
TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
(onlyincompound &&
@@ -2915,8 +2892,6 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts
return nh;
}
-
-
// return length of replacing table
int AffixMgr::get_numrep()
{
@@ -3059,9 +3034,9 @@ FLAG AffixMgr::get_nosuggest()
}
// return the forbidden words flag modify flag
-FLAG AffixMgr::get_pseudoroot()
+FLAG AffixMgr::get_needaffix()
{
- return pseudoroot;
+ return needaffix;
}
// return the onlyincompound flag
@@ -3159,7 +3134,7 @@ int AffixMgr::get_sugswithdots(void)
/* parse flag */
int AffixMgr::parse_flag(char * line, unsigned short * out, const char * name) {
char * s = NULL;
- if (*out != FLAG_NULL) {
+ if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);
return 1;
}
@@ -3216,7 +3191,7 @@ int AffixMgr::parse_cpdsyllable(char * line)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np < 2) {
@@ -3247,7 +3222,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
numrep = atoi(piece);
if (numrep < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in replacement table\n");
- free(piece);
+ // free(piece);
return 1;
}
reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
@@ -3259,7 +3234,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3284,7 +3259,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
if (strncmp(piece,"REP",3) != 0) {
HUNSPELL_WARNING(stderr, "error: replacement table is corrupt\n");
numrep = 0;
- free(piece);
+ // free(piece);
return 1;
}
break;
@@ -3295,7 +3270,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
@@ -3331,7 +3306,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
if (!phone) return 1;
if (phone->num < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in phonelacement table\n");
- free(piece);
+ // free(piece);
return 1;
}
phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
@@ -3343,7 +3318,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3368,7 +3343,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
if (strncmp(piece,"PHONE",5) != 0) {
HUNSPELL_WARNING(stderr, "error: PHONE table is corrupt\n");
phone->num = 0;
- free(piece);
+ // free(piece);
return 1;
}
break;
@@ -3379,7 +3354,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
@@ -3414,7 +3389,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
numcheckcpd = atoi(piece);
if (numcheckcpd < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in compound pattern table\n");
- free(piece);
+ // free(piece);
return 1;
}
checkcpdtable = (replentry *) malloc(numcheckcpd * sizeof(struct replentry));
@@ -3426,7 +3401,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3451,7 +3426,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
HUNSPELL_WARNING(stderr, "error: compound pattern table is corrupt\n");
numcheckcpd = 0;
- free(piece);
+ // free(piece);
return 1;
}
break;
@@ -3462,7 +3437,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
@@ -3494,7 +3469,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
numdefcpd = atoi(piece);
if (numdefcpd < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in compound rule table\n");
- free(piece);
+ // free(piece);
return 1;
}
defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
@@ -3506,7 +3481,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3529,7 +3504,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
case 0: {
if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
HUNSPELL_WARNING(stderr, "error: compound rule table is corrupt\n");
- free(piece);
+ // free(piece);
numdefcpd = 0;
return 1;
}
@@ -3544,7 +3519,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (!defcpdtable[j].len) {
@@ -3577,7 +3552,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
nummap = atoi(piece);
if (nummap < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in map table\n");
- free(piece);
+ // free(piece);
return 1;
}
maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
@@ -3589,7 +3564,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3614,7 +3589,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
if (strncmp(piece,"MAP",3) != 0) {
HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");
nummap = 0;
- free(piece);
+ // free(piece);
return 1;
}
break;
@@ -3642,7 +3617,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if ((!(maptable[j].set || maptable[j].set_utf16)) || (!(maptable[j].len))) {
@@ -3674,7 +3649,7 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
numbreak = atoi(piece);
if (numbreak < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in BREAK table\n");
- free(piece);
+ // free(piece);
return 1;
}
breaktable = (char **) malloc(numbreak * sizeof(char *));
@@ -3686,7 +3661,7 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -3708,7 +3683,7 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
case 0: {
if (strncmp(piece,"BREAK",5) != 0) {
HUNSPELL_WARNING(stderr, "error: BREAK table is corrupt\n");
- free(piece);
+ // free(piece);
numbreak = 0;
return 1;
}
@@ -3722,7 +3697,7 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (!breaktable) {
@@ -3734,6 +3709,31 @@ int AffixMgr::parse_breaktable(char * line, FILE * af)
return 0;
}
+void AffixMgr::reverse_condition(char * piece) {
+ int neg = 0;
+ for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
+ switch(*k) {
+ case '[': {
+ if (neg) *(k+1) = '['; else *k = ']';
+ break;
+ }
+ case ']': {
+ *k = '[';
+ if (neg) *(k+1) = '^';
+ neg = 0;
+ break;
+ }
+ case '^': {
+ if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
+ break;
+ }
+ default: {
+ if (neg) *(k+1) = *k;
+ }
+ }
+ }
+}
+
int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags)
{
int numents = 0; // number of affentry structures to parse
@@ -3795,9 +3795,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
ptr->opts = ff;
if (utf8) ptr->opts += aeUTF8;
if (pHMgr->is_aliasf()) ptr->opts += aeALIASF;
-#ifdef HUNSPELL_EXPERIMENTAL
if (pHMgr->is_aliasm()) ptr->opts += aeALIASM;
-#endif
ptr->aflag = aflag;
}
@@ -3805,7 +3803,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
// check to make sure we parsed enough pieces
@@ -3836,7 +3834,8 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
// piece 1 - is type
case 0: {
np++;
- if (nptr != ptr) nptr->opts = ptr->opts;
+ if (nptr != ptr) nptr->opts = ptr->opts &
+ (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
break;
}
@@ -3848,7 +3847,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
HUNSPELL_WARNING(stderr, "error: affix %s is corrupt near line %s\n", err, nl);
HUNSPELL_WARNING(stderr, "error: possible incorrect count\n");
free(err);
- free(piece);
+ // free(piece);
return 1;
}
@@ -3875,9 +3874,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
// piece 4 - is affix string or 0 for null
case 3: {
char * dash;
-#ifdef HUNSPELL_EXPERIMENTAL
nptr->morphcode = NULL;
-#endif
nptr->contclass = NULL;
nptr->contclasslen = 0;
np++;
@@ -3939,59 +3936,44 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
case 4: {
np++;
if (complexprefixes) {
- int neg = 0;
if (utf8) reverseword_utf(piece); else reverseword(piece);
- // reverse condition
- for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
- switch(*k) {
- case '[': {
- if (neg) *(k+1) = '['; else *k = ']';
- break;
- }
- case ']': {
- *k = '[';
- if (neg) *(k+1) = '^';
- neg = 0;
- break;
- }
- case '^': {
- if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
- break;
- }
- default: {
- if (neg) *(k+1) = *k;
- }
- }
- }
+ reverse_condition(piece);
}
if (nptr->stripl && (strcmp(piece, ".") != 0) &&
redundant_condition(at, nptr->strip, nptr->stripl, piece, nl))
strcpy(piece, ".");
- if (encodeit(nptr,piece)) return 1;
+ if (at == 'S') {
+ reverseword(piece);
+ reverse_condition(piece);
+ }
+ if (encodeit(nptr, piece)) return 1;
break;
}
-#ifdef HUNSPELL_EXPERIMENTAL
case 5: {
np++;
if (pHMgr->is_aliasm()) {
int index = atoi(piece);
nptr->morphcode = pHMgr->get_aliasm(index);
} else {
- if (complexprefixes) {
+ if (complexprefixes) { // XXX - fix me for morph. gen.
if (utf8) reverseword_utf(piece); else reverseword(piece);
}
- nptr->morphcode = mystrdup(piece);
+ // add the remaining of the line
+ if (*tp) {
+ *(tp - 1) = ' ';
+ tp = tp + strlen(tp);
+ }
+ nptr->morphcode = (char *) malloc(strlen(piece)+1);
+ strcpy(nptr->morphcode, piece);
}
break;
}
-#endif
-
default: break;
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
// check to make sure we parsed enough pieces
@@ -4004,7 +3986,6 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
}
#ifdef DEBUG
-#ifdef HUNSPELL_EXPERIMENTAL
// detect unnecessary fields, excepting comments
if (basefieldnum) {
int fieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
@@ -4014,7 +3995,6 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag
basefieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
}
#endif
-#endif
nptr++;
}
diff --git a/src/myspell/affixmgr.hxx b/src/myspell/affixmgr.hxx
index 37ae20e..644d2c9 100644
--- a/src/myspell/affixmgr.hxx
+++ b/src/myspell/affixmgr.hxx
@@ -46,7 +46,7 @@ class AffixMgr
int checkcompoundtriple;
FLAG forbiddenword;
FLAG nosuggest;
- FLAG pseudoroot;
+ FLAG needaffix;
int cpdmin;
int numrep;
replentry * reptable;
@@ -88,6 +88,7 @@ class AffixMgr
FLAG circumfix;
FLAG onlyincompound;
FLAG keepcase;
+ FLAG substandard;
int checksharps;
int havecontclass; // boolean variable
@@ -99,48 +100,55 @@ public:
AffixMgr(const char * affpath, HashMgr * ptr);
~AffixMgr();
struct hentry * affix_check(const char * word, int len,
- const unsigned short needflag = (unsigned short) 0, char in_compound = IN_CPD_NOT);
+ const unsigned short needflag = (unsigned short) 0,
+ char in_compound = IN_CPD_NOT);
struct hentry * prefix_check(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isSubset(const char * s1, const char * s2);
struct hentry * prefix_check_twosfx(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
- struct hentry * suffix_check(const char * word, int len, int sfxopts, AffEntry* ppfx,
- char ** wlst, int maxSug, int * ns, const FLAG cclass = FLAG_NULL,
- const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+ struct hentry * suffix_check(const char * word, int len, int sfxopts,
+ AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
+ const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
struct hentry * suffix_check_twosfx(const char * word, int len,
int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
char * affix_check_morph(const char * word, int len,
- const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+ const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_morph(const char * word, int len,
- char in_compound, const FLAG needflag = FLAG_NULL);
- char * suffix_check_morph (const char * word, int len, int sfxopts, AffEntry * ppfx,
- const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
+ char in_compound, const FLAG needflag = FLAG_NULL);
+ char * suffix_check_morph (const char * word, int len, int sfxopts,
+ AffEntry * ppfx, const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_twosfx_morph(const char * word, int len,
int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
- int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
- int wl, const unsigned short * ap, unsigned short al, char * bad, int,
- char *);
+ char * morphgen(char * ts, int wl, const unsigned short * ap,
+ unsigned short al, char * morph, char * targetmorph, int level);
+
+ int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
+ int wl, const unsigned short * ap, unsigned short al, char * bad,
+ int, char *);
- short get_syllable (const char * word, int wlen);
- int cpdrep_check(const char * word, int len);
- int cpdpat_check(const char * word, int len);
- int defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** rwords, char all);
- int cpdcase_check(const char * word, int len);
- inline int candidate_check(const char * word, int len);
- struct hentry * compound_check(const char * word, int len,
- short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
- char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug);
+ short get_syllable (const char * word, int wlen);
+ int cpdrep_check(const char * word, int len);
+ int cpdpat_check(const char * word, int len);
+ int defcpd_check(hentry *** words, short wnum, hentry * rv,
+ hentry ** rwords, char all);
+ int cpdcase_check(const char * word, int len);
+ inline int candidate_check(const char * word, int len);
+ struct hentry * compound_check(const char * word, int len, short wordnum,
+ short numsyllable, short maxwordnum, short wnum, hentry ** words,
+ char hu_mov_rule, int * cmpdstemnum, int * cmpdstem, char is_sug);
- int compound_check_morph(const char * word, int len,
- short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
- char hu_mov_rule, char ** result, char * partresult);
+ int compound_check_morph(const char * word, int len, short wordnum,
+ short numsyllable, short maxwordnum, short wnum, hentry ** words,
+ char hu_mov_rule, char ** result, char * partresult);
struct hentry * lookup(const char * word);
int get_numrep();
@@ -164,7 +172,7 @@ public:
FLAG get_forbiddenword();
FLAG get_nosuggest();
// FLAG get_circumfix();
- FLAG get_pseudoroot();
+ FLAG get_needaffix();
FLAG get_onlyincompound();
FLAG get_compoundroot();
FLAG get_lemma_present();
@@ -186,11 +194,8 @@ public:
private:
int parse_file(const char * affpath);
-// int parse_string(char * line, char ** out, const char * name);
int parse_flag(char * line, unsigned short * out, const char * name);
int parse_num(char * line, int * out, const char * name);
-// int parse_array(char * line, char ** out, unsigned short ** out_utf16,
-// int * out_utf16_len, const char * name);
int parse_cpdsyllable(char * line);
int parse_reptable(char * line, FILE * af);
int parse_phonetable(char * line, FILE * af);
@@ -200,6 +205,8 @@ private:
int parse_defcpdtable(char * line, FILE * af);
int parse_affix(char * line, const char at, FILE * af, char * dupflags);
+ void reverse_condition(char *);
+ int condlen(char *);
int encodeit(struct affentry * ptr, char * cs);
int build_pfxtree(AffEntry* pfxptr);
int build_sfxtree(AffEntry* sfxptr);
@@ -209,7 +216,8 @@ private:
AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
int process_pfx_tree_to_list();
int process_sfx_tree_to_list();
- int redundant_condition(char, char * strip, int stripl, const char * cond, char *);
+ int redundant_condition(char, char * strip, int stripl,
+ const char * cond, char *);
};
#endif
diff --git a/src/myspell/atypes.hxx b/src/myspell/atypes.hxx
index 009e85a..0afb345 100644
--- a/src/myspell/atypes.hxx
+++ b/src/myspell/atypes.hxx
@@ -26,7 +26,7 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
#define aeUTF8 (1 << 1)
#define aeALIASF (1 << 2)
#define aeALIASM (1 << 3)
-#define aeINFIX (1 << 4)
+#define aeLONGCOND (1 << 4)
// compound options
#define IN_CPD_NOT 0
@@ -38,6 +38,8 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
#define MINCPDLEN 3
#define MAXCOMPOUND 10
+#define MAXCONDLEN 20
+#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *))
#define MAXACC 1000
@@ -56,21 +58,16 @@ struct affentry
char numconds;
char opts;
unsigned short aflag;
- union {
- char base[SETSIZE];
- struct {
- char ascii[SETSIZE/2];
- char neg[8];
- char all[8];
- w_char * wchars[8];
- int wlen[8];
- } utf8;
- } conds;
-#ifdef HUNSPELL_EXPERIMENTAL
- char * morphcode;
-#endif
unsigned short * contclass;
short contclasslen;
+ union {
+ char conds[MAXCONDLEN];
+ struct {
+ char conds1[MAXCONDLEN_1];
+ char * conds2;
+ } l;
+ } c;
+ char * morphcode;
};
struct mapentry {
@@ -91,8 +88,3 @@ struct guessword {
};
#endif
-
-
-
-
-
diff --git a/src/myspell/baseaffix.hxx b/src/myspell/baseaffix.hxx
index d6a5cd6..03a876d 100644
--- a/src/myspell/baseaffix.hxx
+++ b/src/myspell/baseaffix.hxx
@@ -6,26 +6,23 @@ class AffEntry
public:
protected:
- char * appnd;
- char * strip;
- unsigned char appndl;
- unsigned char stripl;
- char numconds;
- char opts;
- unsigned short aflag;
- union {
- char base[SETSIZE];
- struct {
- char ascii[SETSIZE/2];
- char neg[8];
- char all[8];
- w_char * wchars[8];
- int wlen[8];
- } utf8;
- } conds;
- char * morphcode;
- unsigned short * contclass;
- short contclasslen;
+ char * appnd;
+ char * strip;
+ unsigned char appndl;
+ unsigned char stripl;
+ char numconds;
+ char opts;
+ unsigned short aflag;
+ union {
+ char conds[MAXCONDLEN];
+ struct {
+ char conds1[MAXCONDLEN_1];
+ char * conds2;
+ } l;
+ } c;
+ char * morphcode;
+ unsigned short * contclass;
+ short contclasslen;
};
#endif
diff --git a/src/myspell/csutil.cxx b/src/myspell/csutil.cxx
index c56f493..6914957 100644
--- a/src/myspell/csutil.cxx
+++ b/src/myspell/csutil.cxx
@@ -125,7 +125,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
case 0x90:
case 0xa0:
case 0xb0: {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %d. character position\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - src), src);
u2->h = 0xff;
u2->l = 0xfd;
break;
@@ -137,7 +137,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -151,12 +151,12 @@ int u8_u16(w_char * dest, int size, const char * src) {
u2->l = (*u8 << 6) + (*(u8+1) & 0x3f);
u8++;
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
} else {
- HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %d. character position:\n%s\n", u8 - src, src);
+ HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src);
u2->h = 0xff;
u2->l = 0xfd;
}
@@ -221,13 +221,11 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
char * mystrsep(char ** stringp, const char delim)
{
- char * rv = NULL;
char * mp = *stringp;
- int n = strlen(mp);
- if (n > 0) {
+ if (*mp != '\0') {
char * dp;
if (delim) {
- dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+ dp = strchr(mp, delim);
} else {
// don't use isspace() here, the string can be in some random charset
// that's way different than the locale's
@@ -237,20 +235,11 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
if (dp) {
*stringp = dp+1;
int nc = (int)((unsigned long)dp - (unsigned long)mp);
- rv = (char *) malloc(nc+1);
- if (rv) {
- memcpy(rv,mp,nc);
- *(rv+nc) = '\0';
- return rv;
- }
+ *(mp+nc) = '\0';
+ return mp;
} else {
- rv = (char *) malloc(n+1);
- if (rv) {
- memcpy(rv, mp, n);
- *(rv+n) = '\0';
- *stringp = mp + n;
- return rv;
- }
+ *stringp = mp + strlen(mp);
+ return mp;
}
}
return NULL;
@@ -296,112 +285,248 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
return d;
}
-#ifdef HUNSPELL_EXPERIMENTAL
- // append s to ends of every lines in text
- void strlinecat(char * dest, const char * s)
- {
- char * dup = mystrdup(dest);
- char * source = dup;
- int len = strlen(s);
- while (*source) {
- if (*source == '\n') {
- strncpy(dest, s, len);
- dest += len;
- }
- *dest = *source;
- source++; dest++;
- }
- strcpy(dest, s);
- free(dup);
- }
-
// break text to lines
// return number of lines
-int line_tok(const char * text, char *** lines) {
+int line_tok(const char * text, char *** lines, char breakchar) {
int linenum = 0;
char * dup = mystrdup(text);
- char * p = strchr(dup, '\n');
+ char * p = strchr(dup, breakchar);
while (p) {
linenum++;
*p = '\0';
p++;
- p = strchr(p, '\n');
+ p = strchr(p, breakchar);
+ }
+ linenum++;
+ *lines = (char **) malloc(linenum * sizeof(char *));
+ if (!(*lines)) {
+ free(dup);
+ return 0;
}
- *lines = (char **) calloc(linenum + 1, sizeof(char *));
- if (!(*lines)) return -1;
- p = dup;
- for (int i = 0; i < linenum + 1; i++) {
- (*lines)[i] = mystrdup(p);
+ p = dup;
+ int l = 0;
+ for (int i = 0; i < linenum; i++) {
+ if (*p != '\0') {
+ (*lines)[l] = mystrdup(p);
+ l++;
+ }
p += strlen(p) + 1;
}
free(dup);
- return linenum;
+ if (!l) free(*lines);
+ return l;
}
// uniq line in place
-char * line_uniq(char * text) {
+char * line_uniq(char * text, char breakchar) {
char ** lines;
- int linenum = line_tok(text, &lines);
+ int linenum = line_tok(text, &lines, breakchar);
int i;
strcpy(text, lines[0]);
- for ( i = 1; i<=linenum; i++ ) {
+ for ( i = 1; i < linenum; i++ ) {
int dup = 0;
for (int j = 0; j < i; j++) {
if (strcmp(lines[i], lines[j]) == 0) dup = 1;
}
if (!dup) {
- if ((i > 1) || (*(lines[0]) != '\0')) strcat(text, "\n");
+ if ((i > 1) || (*(lines[0]) != '\0')) {
+ sprintf(text + strlen(text), "%c", breakchar);
+ }
strcat(text, lines[i]);
}
}
- for ( i = 0; i<=linenum; i++ ) {
+ for ( i = 0; i < linenum; i++ ) {
if (lines[i]) free(lines[i]);
}
if (lines) free(lines);
return text;
}
+// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
+char * line_uniq_app(char ** text, char breakchar) {
+ if (!strchr(*text, breakchar)) {
+ return *text;
+ }
+
+ char ** lines;
+ int linenum = line_tok(*text, &lines, breakchar);
+ int dup = 0;
+ for (int i = 0; i < linenum; i++) {
+ for (int j = 0; j < (i - 1); j++) {
+ if (strcmp(lines[i], lines[j]) == 0) {
+ *(lines[i]) = '\0';
+ dup++;
+ break;
+ }
+ }
+ }
+ if ((linenum - dup) == 1) {
+ strcpy(*text, lines[0]);
+ freelist(&lines, linenum);
+ return *text;
+ }
+ char * newtext = (char *) malloc(strlen(*text) + 2 * linenum + 3 + 1);
+ if (newtext) {
+ free(*text);
+ *text = newtext;
+ } else {
+ freelist(&lines, linenum);
+ return *text;
+ }
+ strcpy(*text," ( ");
+ for (int i = 0; i < linenum; i++) if (*(lines[i])) {
+ sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
+ }
+ (*text)[strlen(*text) - 2] = ')'; // " ) "
+ freelist(&lines, linenum);
+ return *text;
+}
+
+ // append s to ends of every lines in text
+ void strlinecat(char * dest, const char * s)
+ {
+ char * dup = mystrdup(dest);
+ char * source = dup;
+ int len = strlen(s);
+ while (*source) {
+ if (*source == '\n') {
+ strncpy(dest, s, len);
+ dest += len;
+ }
+ *dest = *source;
+ source++; dest++;
+ }
+ strcpy(dest, s);
+ free(dup);
+ }
+
// change \n to char c
-char * line_join(char * text, char c) {
+char * tr(char * text, char oldc, char newc) {
char * p;
- for (p = text; *p; p++) if (*p == '\n') *p = c;
+ for (p = text; *p; p++) if (*p == oldc) *p = newc;
return text;
}
-// leave only last {[^}]*} substring for handling zero morphemes
-char * delete_zeros(char * morphout) {
- char * p = morphout;
- char * q = p;
- char * q2 = NULL;
- int suffix = 0;
-
- for (;*p && *(p+1);) {
- switch (*p) {
- case '{':
- q2 = q;
- q--;
- break;
- case '}':
- if (q2) {
- suffix = 1;
- q--;
- }
- break;
- default:
- if (suffix) {
- q = q2;
- }
- suffix = 0;
- *q = *p;
+// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
+// in the first line of the inputs
+// return 0, if inputs equal
+// return 1, if inputs may equal with a secondary suffix
+// otherwise return -1
+int morphcmp(const char * s, const char * t)
+{
+ int se;
+ int te;
+ char * sl;
+ char * tl;
+ const char * olds;
+ const char * oldt;
+ if (!s || !t) return 1;
+ olds = s;
+ sl = strchr(s, '\n');
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s= strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ tl = strchr(t, '\n');
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
+ s += MORPH_TAG_LEN;
+ t += MORPH_TAG_LEN;
+ se = 0;
+ te = 0;
+ while ((*s == *t) && !se && !te) {
+ s++;
+ t++;
+ switch(*s) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0': se = 1;
+ }
+ switch(*t) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0': te = 1;
+ }
}
- p++;
- q++;
+ if (!se || !te) {
+ // not terminal suffix difference
+ if (olds) return -1;
+ return 1;
+ }
+ olds = s;
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s)) s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t)) t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ }
+ if (!s && !t && se && te) return 0;
+ return 1;
+}
+
+int get_sfxcount(const char * morph)
+{
+ if (!morph || !*morph) return 0;
+ int n = 0;
+ const char * old = morph;
+ morph = strstr(morph, MORPH_DERI_SFX);
+ if (!morph) morph = strstr(old, MORPH_INFL_SFX);
+ if (!morph) morph = strstr(old, MORPH_TERM_SFX);
+ while (morph) {
+ n++;
+ old = morph;
+ morph = strstr(morph + 1, MORPH_DERI_SFX);
+ if (!morph) morph = strstr(old + 1, MORPH_INFL_SFX);
+ if (!morph) morph = strstr(old + 1, MORPH_TERM_SFX);
+ }
+ return n;
+}
+
+
+int fieldlen(const char * r)
+{
+ int n = 0;
+ while (r && *r != '\t' && *r != '\0' && *r != '\n' && *r != ' ') {
+ r++;
+ n++;
}
- *q = '\0';
- return morphout;
+ return n;
+}
+
+char * copy_field(char * dest, const char * morph, const char * var)
+{
+ if (!morph) return NULL;
+ char * beg = strstr(morph, var);
+ if (beg) {
+ char * d = dest;
+ for (beg += MORPH_TAG_LEN; *beg != ' ' && *beg != '\t' &&
+ *beg != '\n' && *beg != '\0'; d++, beg++) {
+ *d = *beg;
+ }
+ *d = '\0';
+ return dest;
+ }
+ return NULL;
}
-#endif // END OF HUNSPELL_EXPERIMENTAL CODE
char * mystrrep(char * word, const char * pat, const char * rep) {
char * pos = strstr(word, pat);
@@ -452,6 +577,33 @@ char * mystrrep(char * word, const char * pat, const char * rep) {
u16_u8(word, MAXWORDUTF8LEN, w, l);
return 0;
}
+
+ int uniqlist(char ** list, int n) {
+ if (n < 2) return n;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < i; j++) {
+ if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
+ free(list[i]);
+ list[i] = NULL;
+ break;
+ }
+ }
+ }
+ int m = 1;
+ for (int i = 1; i < n; i++) if (list[i]) {
+ list[m] = list[i];
+ m++;
+ }
+ return m;
+ }
+
+ void freelist(char *** list, int n) {
+ if (list && (n > 0)) {
+ for (int i = 0; i < n; i++) if ((*list)[i]) free((*list)[i]);
+ free(*list);
+ *list = NULL;
+ }
+ }
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv)
@@ -5319,7 +5471,7 @@ int parse_string(char * line, char ** out, const char * warnvar)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
diff --git a/src/myspell/csutil.hxx b/src/myspell/csutil.hxx
index e1ba94e..df7979b 100644
--- a/src/myspell/csutil.hxx
+++ b/src/myspell/csutil.hxx
@@ -9,17 +9,32 @@
#define HUHCAP 3
#define HUHINITCAP 4
-#define FIELD_STEM "st:"
-#define FIELD_POS "po:"
-#define FIELD_SUFF "su:"
-#define FIELD_PREF "pr:"
-#define FIELD_FREQ "fr:"
-#define FIELD_PHON "ph:"
-#define FIELD_HYPH "hy:"
-#define FIELD_COMP "co:"
+#define MORPH_STEM "st:"
+#define MORPH_ALLOMORPH "al:"
+#define MORPH_POS "po:"
+#define MORPH_DERI_PFX "dp:"
+#define MORPH_INFL_PFX "ip:"
+#define MORPH_TERM_PFX "tp:"
+#define MORPH_DERI_SFX "ds:"
+#define MORPH_INFL_SFX "is:"
+#define MORPH_TERM_SFX "ts:"
+#define MORPH_SURF_PFX "sp:"
+#define MORPH_FREQ "fr:"
+#define MORPH_PHON "ph:"
+#define MORPH_HYPH "hy:"
+#define MORPH_PART "pa:"
+#define MORPH_HENTRY "_H:"
+#define MORPH_TAG_LEN strlen(MORPH_STEM)
+
+#define MSEP_FLD ' '
+#define MSEP_REC '\n'
+#define MSEP_ALT '\v'
+
// default flags
-#define ONLYUPCASEFLAG 65535
+#define DEFAULTFLAGS 65510
+#define FORBIDDENWORD 65510
+#define ONLYUPCASEFLAG 65511
typedef struct {
unsigned char l;
@@ -61,16 +76,14 @@ char * mystrrep(char *, const char *, const char *);
void strlinecat(char * lines, const char * s);
// tokenize into lines with new line
- int line_tok(const char * text, char *** lines);
+ int line_tok(const char * text, char *** lines, char breakchar);
// tokenize into lines with new line and uniq in place
- char * line_uniq(char * text);
-
-// change \n to c in place
- char * line_join(char * text, char c);
+ char * line_uniq(char * text, char breakchar);
+ char * line_uniq_app(char ** text, char breakchar);
-// leave only last {[^}]*} pattern in string
- char * delete_zeros(char * morphout);
+// change oldchar to newchar in place
+ char * tr(char * text, char oldc, char newc);
// reverse word
int reverseword(char *);
@@ -78,6 +91,12 @@ void strlinecat(char * lines, const char * s);
// reverse word
int reverseword_utf(char *);
+// remove duplicates
+ int uniqlist(char ** list, int n);
+
+// free character array list
+ void freelist(char *** list, int n);
+
// character encoding information
struct cs_info {
unsigned char ccase;
@@ -174,4 +193,11 @@ int parse_string(char * line, char ** out, const char * name);
int parse_array(char * line, char ** out,
unsigned short ** out_utf16, int * out_utf16_len, const char * name, int utf8);
+int fieldlen(const char * r);
+char * copy_field(char * dest, const char * morph, const char * var);
+
+int morphcmp(const char * s, const char * t);
+
+int get_sfxcount(const char * morph);
+
#endif
diff --git a/src/myspell/hashmgr.cxx b/src/myspell/hashmgr.cxx
index 656fb85..08e061c 100644
--- a/src/myspell/hashmgr.cxx
+++ b/src/myspell/hashmgr.cxx
@@ -47,7 +47,7 @@ HashMgr::HashMgr(const char * tpath, const char * apath)
aliasf = NULL;
numaliasm = 0;
aliasm = NULL;
- forbiddenword = FLAG_NULL; // forbidden word signing flag
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag
load_config(apath);
int ec = load_tables(tpath);
if (ec) {
@@ -70,20 +70,9 @@ HashMgr::~HashMgr()
for (int i=0; i < tablesize; i++) {
struct hentry * pt = tableptr[i];
struct hentry * nt = NULL;
-/* if (pt) {
- if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
-#ifdef HUNSPELL_EXPERIMENTAL
- if (pt->description && !aliasm) free(pt->description);
-#endif
- pt = pt->next;
- }
-*/
while(pt) {
nt = pt->next;
if (pt->astr && (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) free(pt->astr);
-#ifdef HUNSPELL_EXPERIMENTAL
- if (pt->description && !aliasm) free(pt->description);
-#endif
free(pt);
pt = nt;
}
@@ -140,20 +129,13 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int al, const char * desc, bool onlyupcase)
{
bool upcasehomonym = false;
- int descl = (desc) ? strlen(desc) : 0;
+ int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0;
// variable-length hash record with word and optional fields
- // instead of mmap implementation temporarily
struct hentry* hp =
- (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl + 1);
+ (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
if (!hp) return 1;
char * hpw = &(hp->word);
strcpy(hpw, word);
- if (desc && strncmp(desc, FIELD_PHON, strlen(FIELD_PHON)) == 0) {
- strcpy(hpw + wbl + 1, desc + strlen(FIELD_PHON));
- hp->var = 1;
- } else {
- hp->var = 0;
- }
if (ignorechars != NULL) {
if (utf8) {
remove_ignored_chars_utf(hpw, ignorechars_utf16, ignorechars_utf16_len);
@@ -167,29 +149,29 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
int i = hash(hpw);
- hp->blen = (unsigned char) wbl;
- hp->clen = (unsigned char) wcl;
- hp->alen = (short) al;
- hp->astr = aff;
- hp->next = NULL;
- hp->next_homonym = NULL;
-#ifdef HUNSPELL_EXPERIMENTAL
- if (aliasm) {
- hp->description = (desc) ? get_aliasm(atoi(desc)) : mystrdup(desc);
- } else {
- hp->description = mystrdup(desc);
- if (desc && !hp->description)
- {
- free(hp->astr);
- free(hp);
- return 1;
- }
- if (hp->description && complexprefixes) {
- if (utf8) reverseword_utf(hp->description); else reverseword(hp->description);
+ hp->blen = (unsigned char) wbl;
+ hp->clen = (unsigned char) wcl;
+ hp->alen = (short) al;
+ hp->astr = aff;
+ hp->next = NULL;
+ hp->next_homonym = NULL;
+
+ // store the description string or its pointer
+ if (desc) {
+ hp->var = H_OPT;
+ if (aliasm) {
+ hp->var += H_OPT_ALIASM;
+ *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc));
+ } else {
+ strcpy(hpw + wbl + 1, desc);
+ if (complexprefixes) {
+ if (utf8) reverseword_utf(HENTRY_DATA(hp));
+ else reverseword(HENTRY_DATA(hp));
}
- }
-#endif
-
+ }
+ if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON;
+ } else hp->var = 0;
+
struct hentry * dp = tableptr[i];
if (!dp) {
tableptr[i] = hp;
@@ -284,8 +266,31 @@ int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) {
return len;
}
+// remove word with FORBIDDENWORD flag (not implemented)
+int HashMgr::remove(const char * word)
+{
+ struct hentry * dp = lookup(word);
+/*
+ if (!word || (!dp->astr || !TESTAFF(dp->astr, forbiddenword, pt->alen))) {
+ int wbl = strlen(word);
+ int wcl = get_clen_and_captype(word, wbl, &captype);
+ if (aliasf) {
+ add_word(word, wbl, wcl, dp->astr, dp->alen, NULL, false);
+ } else {
+ unsigned short * flags = (unsigned short *) malloc (dp->alen * sizeof(short));
+ if (flags) {
+ memcpy((void *) flags, (void *) dp->astr, dp->alen * sizeof(short));
+ add_word(word, wbl, wcl, flags, dp->alen, NULL, false);
+ } else return 1;
+ }
+ return add_hidden_capitalized_word((char *) word, wbl, wcl, dp->astr, dp->alen, NULL, captype);
+ }
+*/
+ return 1;
+}
+
// add a custom dic. word to the hash table (public)
-int HashMgr::put_word(const char * word, char * aff)
+int HashMgr::add(const char * word, char * aff)
{
unsigned short * flags;
int al = 0;
@@ -303,10 +308,10 @@ int HashMgr::put_word(const char * word, char * aff)
return add_hidden_capitalized_word((char *) word, wbl, wcl, flags, al, NULL, captype);
}
-int HashMgr::put_word_pattern(const char * word, const char * pattern)
+int HashMgr::add_with_affix(const char * word, const char * example)
{
// detect captype and modify word length for UTF-8 encoding
- struct hentry * dp = lookup(pattern);
+ struct hentry * dp = lookup(example);
if (dp && dp->astr) {
int captype;
int wbl = strlen(word);
@@ -389,6 +394,8 @@ int HashMgr::load_tables(const char * tpath)
mychomp(ts);
// split each line into word and morphological description
dp = strchr(ts,'\t');
+ char * dp2 = strchr(ts,' ');
+ if (dp2 && (!dp || (dp2 < dp))) dp = dp2;
if (dp) {
*dp = '\0';
@@ -644,16 +651,15 @@ int HashMgr::load_config(const char * affpath)
}
}
-#ifdef HUNSPELL_EXPERIMENTAL
if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) {
if (parse_aliasm(line, afflst)) {
fclose(afflst);
return 1;
}
}
-#endif
- if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
- if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
+
+ if (strncmp(line,"COMPLEXPREFIXES",15) == 0) complexprefixes = 1;
+ if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break;
}
if (csconv == NULL) csconv = get_current_cs("ISO8859-1");
fclose(afflst);
@@ -683,7 +689,7 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
aliasf = NULL;
aliasflen = NULL;
HUNSPELL_WARNING(stderr, "incorrect number of entries in AF table\n");
- free(piece);
+ // free(piece);
return 1;
}
aliasf = (unsigned short **) malloc(numaliasf * sizeof(unsigned short *));
@@ -703,7 +709,7 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -737,7 +743,7 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
aliasf = NULL;
aliasflen = NULL;
HUNSPELL_WARNING(stderr, "error: AF table is corrupt\n");
- free(piece);
+ // free(piece);
return 1;
}
break;
@@ -751,7 +757,7 @@ int HashMgr::parse_aliasf(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (!aliasf[j]) {
@@ -781,7 +787,6 @@ int HashMgr::get_aliasf(int index, unsigned short ** fvec) {
return 0;
}
-#ifdef HUNSPELL_EXPERIMENTAL
/* parse morph alias definitions */
int HashMgr::parse_aliasm(char * line, FILE * af)
{
@@ -802,7 +807,7 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
numaliasm = atoi(piece);
if (numaliasm < 1) {
HUNSPELL_WARNING(stderr, "incorrect number of entries in AM table\n");
- free(piece);
+ // free(piece);
return 1;
}
aliasm = (char **) malloc(numaliasm * sizeof(char *));
@@ -817,7 +822,7 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
}
i++;
}
- free(piece);
+ // free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
@@ -836,14 +841,14 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
tp = nl;
i = 0;
aliasm[j] = NULL;
- piece = mystrsep(&tp, 0);
+ piece = mystrsep(&tp, ' ');
while (piece) {
if (*piece != '\0') {
switch(i) {
case 0: {
if (strncmp(piece,"AM",2) != 0) {
HUNSPELL_WARNING(stderr, "error: AM table is corrupt\n");
- free(piece);
+ // free(piece);
numaliasm = 0;
free(aliasm);
aliasm = NULL;
@@ -852,6 +857,11 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
break;
}
case 1: {
+ // add the remaining of the line
+ if (*tp) {
+ *(tp - 1) = ' ';
+ tp = tp + strlen(tp);
+ }
if (complexprefixes) {
if (utf8) reverseword_utf(piece);
else reverseword(piece);
@@ -862,8 +872,8 @@ int HashMgr::parse_aliasm(char * line, FILE * af)
}
i++;
}
- free(piece);
- piece = mystrsep(&tp, 0);
+ // free(piece);
+ piece = mystrsep(&tp, ' ');
}
if (!aliasm[j]) {
numaliasm = 0;
@@ -885,4 +895,3 @@ char * HashMgr::get_aliasm(int index) {
HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
return NULL;
}
-#endif
diff --git a/src/myspell/hashmgr.hxx b/src/myspell/hashmgr.hxx
index cf5148f..d88de48 100644
--- a/src/myspell/hashmgr.hxx
+++ b/src/myspell/hashmgr.hxx
@@ -42,18 +42,16 @@ public:
int hash(const char *) const;
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
- int put_word(const char * word, char * ap);
- int put_word_pattern(const char * word, const char * pattern);
+ int add(const char * word, char * aff);
+ int add_with_affix(const char * word, const char * pattern);
+ int remove(const char * word);
int decode_flags(unsigned short ** result, char * flags);
unsigned short decode_flag(const char * flag);
char * encode_flag(unsigned short flag);
int is_aliasf();
int get_aliasf(int index, unsigned short ** fvec);
-#ifdef HUNSPELL_EXPERIMENTAL
int is_aliasm();
char * get_aliasm(int index);
-#endif
-
private:
int get_clen_and_captype(const char * word, int wbl, int * captype);
@@ -64,9 +62,7 @@ private:
int parse_aliasf(char * line, FILE * af);
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
unsigned short * flags, int al, char * dp, int captype);
-#ifdef HUNSPELL_EXPERIMENTAL
int parse_aliasm(char * line, FILE * af);
-#endif
};
diff --git a/src/myspell/htypes.hxx b/src/myspell/htypes.hxx
index ea43730..bc078c3 100644
--- a/src/myspell/htypes.hxx
+++ b/src/myspell/htypes.hxx
@@ -8,6 +8,16 @@
#define ROTATE(v,q) \
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
+// hentry options
+#define H_OPT (1 << 0)
+#define H_OPT_ALIASM (1 << 1)
+#define H_OPT_PHON (1 << 2)
+
+#define HENTRY_WORD(h) &(h->word)
+#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
+ *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL)
+#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
+
// approx. number of user defined words
#define USERWORD 1000
@@ -19,9 +29,6 @@ struct hentry
unsigned short * astr; // affix flag vector
struct hentry * next; // next word with same hash code
struct hentry * next_homonym; // next homonym word (with same hash code)
-#ifdef HUNSPELL_EXPERIMENTAL
- char * description; // morphological data (optional)
-#endif
char var; // variable fields (only for special pronounciation yet)
char word; // variable-length word (8-bit or UTF-8 encoding)
};
diff --git a/src/myspell/hunspell.cxx b/src/myspell/hunspell.cxx
index cd31a64..3f114c9 100644
--- a/src/myspell/hunspell.cxx
+++ b/src/myspell/hunspell.cxx
@@ -14,6 +14,10 @@
#include "hunspell.hxx"
#include "hunspell.h"
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#ifndef MOZILLA_CLIENT
#ifndef W32
using namespace std;
@@ -59,7 +63,7 @@ Hunspell::~Hunspell()
pAMgr = NULL;
pHMgr = NULL;
#ifdef MOZILLA_CLIENT
- delete csconv;
+ free(csconv);
#endif
csconv= NULL;
if (encoding) free(encoding);
@@ -117,7 +121,6 @@ int Hunspell::cleanword2(char * dest, const char * src,
return nl;
}
-#ifdef HUNSPELL_EXPERIMENTAL
int Hunspell::cleanword(char * dest, const char * src,
int * pcaptype, int * pabbrev)
{
@@ -190,7 +193,6 @@ int Hunspell::cleanword(char * dest, const char * src,
}
return strlen(dest);
}
-#endif
void Hunspell::mkallcap(char * p)
{
@@ -602,9 +604,9 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
return NULL;
}
- // he = next not pseudoroot, onlyincompound homonym or onlyupcase word
+ // he = next not needaffix, onlyincompound homonym or onlyupcase word
while (he && (he->astr) &&
- ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
+ ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
(pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
(info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
)) he = he->next_homonym;
@@ -1025,95 +1027,78 @@ int Hunspell::suggest_auto(char*** slst, const char * word)
// END OF LANG_hu section
return ns;
}
+#endif
-// XXX need UTF-8 support
-int Hunspell::stem(char*** slst, const char * word)
+int Hunspell::stem(char*** slst, char ** desc, int n)
{
- char cw[MAXWORDUTF8LEN];
- char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
- int wl = strlen(word);
- if (utf8) {
- if (wl >= MAXWORDUTF8LEN) return 0;
- } else {
- if (wl >= MAXWORDLEN) return 0;
- }
- int captype = 0;
- int abbv = 0;
- wl = cleanword(cw, word, &captype, &abbv);
- if (wl == 0) return 0;
-
- int ns = 0;
-
- *slst = NULL; // HU, nsug in pSMgr->suggest
-
- switch(captype) {
- case HUHCAP:
- case NOCAP: {
- ns = pSMgr->suggest_stems(slst, cw, ns);
-
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- break;
- }
-
- case INITCAP: {
-
- ns = pSMgr->suggest_stems(slst, cw, ns);
-
- if (ns == 0) {
- memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
-
- }
-
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- mkallsmall(wspace);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- break;
-
- }
-
- case ALLCAP: {
- ns = pSMgr->suggest_stems(slst, cw, ns);
- if (ns != 0) break;
-
- memcpy(wspace,cw,(wl+1));
- mkallsmall(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
-
- if (ns == 0) {
- mkinitcap(wspace);
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
- if ((abbv) && (ns == 0)) {
- memcpy(wspace,cw,wl);
- mkallsmall(wspace);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- ns = pSMgr->suggest_stems(slst, wspace, ns);
- }
-
+ char result[MAXLNLEN];
+ char result2[MAXLNLEN];
+ if (n == 0) return 0;
+ *result2 = '\0';
+ for (int i = 0; i < n; i++) {
+ *result = '\0';
+ // add compound word parts (except the last one)
+ char * s = (char *) desc[i];
+ char * part = strstr(s, MORPH_PART);
+ if (part) {
+ char * nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ copy_field(result + strlen(result), part, MORPH_PART);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
- break;
- }
+ char **pl;
+ char tok[MAXLNLEN];
+ strcpy(tok, s);
+ char * alt = strstr(tok, " | ");
+ while (alt) {
+ alt[1] = MSEP_ALT;
+ alt = strstr(alt, " | ");
+ }
+ int pln = line_tok(tok, &pl, MSEP_ALT);
+ for (int i = 0; i < pln; i++) {
+ // add derivational suffixes
+ if (strstr(pl[i], MORPH_DERI_SFX)) {
+ // remove inflectional suffixes
+ char * is = strstr(pl[i], MORPH_INFL_SFX);
+ if (is) *is = '\0';
+ char * sg = pSMgr->suggest_gen(&(pl[i]), 1, pl[i]);
+ if (sg) {
+ char ** gen;
+ int genl = line_tok(sg, &gen, MSEP_REC);
+ free(sg);
+ for (int j = 0; j < genl; j++) {
+ sprintf(result2 + strlen(result2), "%c%s%s",
+ MSEP_REC, result, gen[j]);
+ }
+ freelist(&gen, genl);
+ }
+ } else {
+ sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
+ if (strstr(pl[i], MORPH_SURF_PFX)) {
+ copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX);
+ }
+ copy_field(result2 + strlen(result2), pl[i], MORPH_STEM);
+ }
+ }
+ freelist(&pl, pln);
}
-
- return ns;
+ return uniqlist(*slst, line_tok(result2, slst, MSEP_REC));
}
+int Hunspell::stem(char*** slst, const char * word)
+{
+ char ** pl;
+ int pln = analyze(&pl, word);
+ int pln2 = stem(slst, pl, pln);
+ freelist(&pl, pln);
+ return pln2;
+}
+
+#ifdef HUNSPELL_EXPERIMENTAL
int Hunspell::suggest_pos_stems(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
@@ -1236,15 +1221,23 @@ int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
return nc;
}
-int Hunspell::put_word(const char * word)
+int Hunspell::add(const char * word)
{
- if (pHMgr) return pHMgr->put_word(word, NULL);
+ if (pHMgr) return pHMgr->add(word, NULL);
return 0;
}
-int Hunspell::put_word_pattern(const char * word, const char * pattern)
+int Hunspell::add_with_affix(const char * word, const char * example)
{
- if (pHMgr) return pHMgr->put_word_pattern(word, pattern);
+ if (pHMgr) return pHMgr->add_with_affix(word, example);
+ return 0;
+}
+
+/* XXX not implemented yet */
+
+int Hunspell::remove(const char * word)
+{
+ if (pHMgr) return pHMgr->remove(word);
return 0;
}
@@ -1258,9 +1251,16 @@ struct cs_info * Hunspell::get_csconv()
return csconv;
}
-#ifdef HUNSPELL_EXPERIMENTAL
-// XXX need UTF-8 support
-char * Hunspell::morph(const char * word)
+char * Hunspell::cat_result(char * result, char * st)
+{
+ if (st) {
+ if (*result) strcat(result, "\n");
+ strcat(result, st);
+ free(st);
+ }
+}
+
+int Hunspell::analyze(char*** slst, const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
@@ -1305,156 +1305,77 @@ char * Hunspell::morph(const char * word)
}
}
- if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;
+ if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
strcat(result, cw);
result[n - 1] = '\0';
- if (n == wl) {
- st = pSMgr->suggest_morph(cw + n - 1);
- if (st) {
- strcat(result, st);
- free(st);
- }
- } else {
+ if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
+ else {
char sign = cw[n];
cw[n] = '\0';
- st = pSMgr->suggest_morph(cw + n - 1);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(cw + n - 1));
strcat(result, "+"); // XXX SPEC. MORPHCODE
cw[n] = sign;
- st = pSMgr->suggest_morph(cw + n);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(cw + n));
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
// END OF LANG_hu section
switch(captype) {
- case NOCAP: {
- st = pSMgr->suggest_morph(cw);
- if (st) {
- strcat(result, st);
- free(st);
- }
- if (abbv) {
- memcpy(wspace,cw,wl);
- *(wspace+wl) = '.';
- *(wspace+wl+1) = '\0';
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- }
- break;
- }
+ case NOCAP: {
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
+ memcpy(wspace,cw,wl);
+ *(wspace+wl) = '.';
+ *(wspace+wl+1) = '\0';
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ }
+ break;
+ }
case INITCAP: {
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
- st = pSMgr->suggest_morph(cw);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- if (abbv) {
- memcpy(wspace,cw,wl);
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ cat_result(result, pSMgr->suggest_morph(cw));
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
}
break;
}
case HUHCAP: {
- st = pSMgr->suggest_morph(cw);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(cw));
#if 0
memcpy(wspace,cw,(wl+1));
mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
#endif
break;
}
case ALLCAP: {
memcpy(wspace,cw,(wl+1));
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- if (abbv) {
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ mkinitcap(wspace);
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ if (abbv) {
memcpy(wspace,cw,(wl+1));
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
- if (*result) strcat(result, "\n");
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- strcat(result, st);
- free(st);
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
mkallsmall(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- mkinitcap(wspace);
- st = pSMgr->suggest_morph(wspace);
- if (st) {
- if (*result) strcat(result, "\n");
- strcat(result, st);
- free(st);
- }
- }
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ mkinitcap(wspace);
+ cat_result(result, pSMgr->suggest_morph(wspace));
+ }
break;
}
}
@@ -1464,7 +1385,8 @@ char * Hunspell::morph(const char * word)
if (complexprefixes) {
if (utf8) reverseword_utf(result); else reverseword(result);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
+
}
// compound word with dash (HU) I18n
@@ -1476,7 +1398,7 @@ char * Hunspell::morph(const char * word)
*dash='\0';
// examine 2 sides of the dash
if (dash[1] == '\0') { // base word ending with dash
- if (spell(cw)) return pSMgr->suggest_morph(cw);
+ if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
} else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
if (spell(cw) && (spell("-e"))) {
st = pSMgr->suggest_morph(cw);
@@ -1490,7 +1412,7 @@ char * Hunspell::morph(const char * word)
strcat(result, st);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
} else {
// first word ending with dash: word- XXX ???
@@ -1502,18 +1424,18 @@ char * Hunspell::morph(const char * word)
dash[0]='\0';
if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
((dash[1] > '0') && (dash[1] < '9')))) {
- st = morph(cw);
+ st = pSMgr->suggest_morph(cw);
if (st) {
strcat(result, st);
free(st);
strcat(result,"+"); // XXX spec. separator in MORPHCODE
}
- st = morph(dash+1);
+ st = pSMgr->suggest_morph(dash+1);
if (st) {
strcat(result, st);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
// affixed number in correct word
@@ -1539,30 +1461,89 @@ char * Hunspell::morph(const char * word)
strcat(result, st);
free(st);
}
- return mystrdup(result);
+ return line_tok(result, slst, MSEP_REC);
}
}
}
}
- return NULL;
+ return 0;
}
+int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
+{
+ if (!pSMgr || !pln) return 0;
+ char **pl2;
+ char pl2n = analyze(&pl2, word);
+ int captype = 0;
+ int abbv = 0;
+ char cw[MAXWORDUTF8LEN];
+ cleanword(cw, word, &captype, &abbv);
+ char result[MAXLNLEN];
+ *result = '\0';
+
+ for (int i = 0; i < pln; i++) {
+ cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
+ }
+ freelist(&pl2, pl2n);
+
+ if (*result) {
+ // allcap
+ if (captype == ALLCAP) mkallcap(result);
+
+ // line split
+ int linenum = line_tok(result, slst, MSEP_REC);
+
+ // capitalize
+ if (captype == INITCAP || captype == HUHINITCAP) {
+ for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
+ }
+
+ // temporary filtering of prefix related errors (eg.
+ // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
+
+ int r = 0;
+ for (int j=0; j < linenum; j++) {
+ if (!spell((*slst)[j])) {
+ free((*slst)[j]);
+ (*slst)[j] = NULL;
+ } else {
+ if (r < j) (*slst)[r] = (*slst)[j];
+ r++;
+ }
+ }
+ if (r > 0) return r;
+ free(*slst);
+ *slst = NULL;
+ }
+ return 0;
+}
+
+int Hunspell::generate(char*** slst, const char * word, const char * pattern)
+{
+ char **pl;
+ char pln = analyze(&pl, pattern);
+ int n = generate(slst, word, pl, pln);
+ freelist(&pl, pln);
+ return uniqlist(*slst, n);
+}
+
+#ifdef HUNSPELL_EXPERIMENTAL
// XXX need UTF-8 support
char * Hunspell::morph_with_correction(const char * word)
{
char cw[MAXWORDUTF8LEN];
char wspace[MAXWORDUTF8LEN];
- if (! pSMgr) return 0;
+ if (! pSMgr) return NULL;
int wl = strlen(word);
if (utf8) {
- if (wl >= MAXWORDUTF8LEN) return 0;
+ if (wl >= MAXWORDUTF8LEN) return NULL;
} else {
- if (wl >= MAXWORDLEN) return 0;
+ if (wl >= MAXWORDLEN) return NULL;
}
int captype = 0;
int abbv = 0;
wl = cleanword(cw, word, &captype, &abbv);
- if (wl == 0) return 0;
+ if (wl == 0) return NULL;
char result[MAXLNLEN];
char * st = NULL;
@@ -1577,8 +1558,8 @@ char * Hunspell::morph_with_correction(const char * word)
strcat(result, st);
free(st);
}
- if (abbv) {
- memcpy(wspace,cw,wl);
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
st = pSMgr->suggest_morph_for_spelling_error(wspace);
@@ -1598,14 +1579,14 @@ char * Hunspell::morph_with_correction(const char * word)
strcat(result, st);
free(st);
}
- st = pSMgr->suggest_morph_for_spelling_error(cw);
+ st = pSMgr->suggest_morph_for_spelling_error(cw);
if (st) {
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
}
- if (abbv) {
- memcpy(wspace,cw,wl);
+ if (abbv) {
+ memcpy(wspace,cw,wl);
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
mkallsmall(wspace);
@@ -1614,14 +1595,14 @@ char * Hunspell::morph_with_correction(const char * word)
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
- }
+ }
mkinitcap(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
- }
+ }
}
break;
}
@@ -1655,22 +1636,22 @@ char * Hunspell::morph_with_correction(const char * word)
strcat(result, st);
free(st);
}
- mkinitcap(wspace);
- st = pSMgr->suggest_morph_for_spelling_error(wspace);
+ mkinitcap(wspace);
+ st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
}
- if (abbv) {
+ if (abbv) {
memcpy(wspace,cw,(wl+1));
*(wspace+wl) = '.';
*(wspace+wl+1) = '\0';
if (*result) strcat(result, "\n");
st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
- strcat(result, st);
- free(st);
+ strcat(result, st);
+ free(st);
}
mkallsmall(wspace);
st = pSMgr->suggest_morph_for_spelling_error(wspace);
@@ -1679,14 +1660,14 @@ char * Hunspell::morph_with_correction(const char * word)
strcat(result, st);
free(st);
}
- mkinitcap(wspace);
- st = pSMgr->suggest_morph_for_spelling_error(wspace);
+ mkinitcap(wspace);
+ st = pSMgr->suggest_morph_for_spelling_error(wspace);
if (st) {
if (*result) strcat(result, "\n");
strcat(result, st);
free(st);
}
- }
+ }
break;
}
}
@@ -1695,37 +1676,6 @@ char * Hunspell::morph_with_correction(const char * word)
return NULL;
}
-/* analyze word
- * return line count
- * XXX need a better data structure for morphological analysis */
-int Hunspell::analyze(char ***out, const char *word) {
- int n = 0;
- if (!word) return 0;
- char * m = morph(word);
- if(!m) return 0;
- if (!out)
- {
- n = line_tok(m, out);
- free(m);
- return n;
- }
-
- // without memory allocation
- /* BUG missing buffer size checking */
- int i, p;
- for(p = 0, i = 0; m[i]; i++) {
- if(m[i] == '\n' || !m[i+1]) {
- n++;
- strncpy((*out)[n++], m + p, i - p + 1);
- if (m[i] == '\n') (*out)[n++][i - p] = '\0';
- if(!m[i+1]) break;
- p = i + 1;
- }
- }
- free(m);
- return n;
-}
-
#endif // END OF HUNSPELL_EXPERIMENTAL CODE
Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
@@ -1753,3 +1703,54 @@ int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
return ((Hunspell*)pHunspell)->suggest(slst, word);
}
+int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
+{
+ return ((Hunspell*)pHunspell)->analyze(slst, word);
+}
+
+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
+{
+ return ((Hunspell*)pHunspell)->stem(slst, word);
+}
+
+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)
+{
+ return ((Hunspell*)pHunspell)->stem(slst, desc, n);
+}
+
+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+ const char * word2)
+{
+ return ((Hunspell*)pHunspell)->generate(slst, word, word2);
+}
+
+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+ char** desc, int n)
+{
+ return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
+}
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
+
+int Hunspell_add(Hunhandle *pHunspell, const char * word) {
+ return ((Hunspell*)pHunspell)->add(word);
+}
+
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
+ const char * example) {
+ return ((Hunspell*)pHunspell)->add_with_affix(word, example);
+}
+
+ /* remove word from the run-time dictionary */
+ /* NOTE: not implemented yet */
+
+int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
+ return ((Hunspell*)pHunspell)->remove(word);
+}
diff --git a/src/myspell/hunspell.h b/src/myspell/hunspell.h
index b04b83a..452599c 100644
--- a/src/myspell/hunspell.h
+++ b/src/myspell/hunspell.h
@@ -26,6 +26,60 @@ char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
*/
int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
+ /* morphological functions */
+
+ /* analyze(result, word) - morphological analysis of the word */
+
+int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
+
+ /* stem(result, word) - stemmer function */
+
+int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
+
+ /* stem(result, analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = Hunspell_analyze(result, "words");
+ * int n2 = Hunspell_stem2(result2, result, n1);
+ */
+
+int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
+
+ /* generate(result, word, word2) - morphological generation by example(s) */
+
+int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
+ const char * word2);
+
+ /* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = Hunspell_generate2(result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+
+int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
+ char** desc, int n);
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
+
+int Hunspell_add(const char * word);
+
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+int Hunspell_add_with_affix(const char * word, const char * example);
+
+ /* remove word from the run-time dictionary */
+ /* NOTE: not implemented yet */
+
+int Hunspell_remove(const char * word);
+
+
#ifdef __cplusplus
}
#endif
diff --git a/src/myspell/hunspell.hxx b/src/myspell/hunspell.hxx
index 6d3f092..38c141e 100644
--- a/src/myspell/hunspell.hxx
+++ b/src/myspell/hunspell.hxx
@@ -73,17 +73,59 @@ public:
*/
int suggest(char*** slst, const char * word);
+
char * get_dic_encoding();
- /* handling custom dictionary */
+ /* morphological functions */
+
+ /* analyze(result, word) - morphological analysis of the word */
+
+ int analyze(char*** slst, const char * word);
+
+ /* stem(result, word) - stemmer function */
+
+ int stem(char*** slst, const char * word);
+
+ /* stem(result, analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = analyze(result, "words");
+ * int n2 = stem(result2, result, n1);
+ */
+
+ int stem(char*** slst, char ** morph, int n);
+
+ /* generate(result, word, word2) - morphological generation by example(s) */
+
+ int generate(char*** slst, const char * word, const char * word2);
+
+ /* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = generate(result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+
+ int generate(char*** slst, const char * word, char ** desc, int n);
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
- int put_word(const char * word);
+ int add(const char * word);
- /* pattern is a sample dictionary word
- * put word into custom dictionary with affix flags of pattern word
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
*/
- int put_word_pattern(const char * word, const char * pattern);
+ int add_with_affix(const char * word, const char * example);
+
+ /* remove word from the run-time dictionary */
+ /* NOTE: not implemented yet */
+
+ int remove(const char * word);
/* other */
@@ -93,25 +135,14 @@ public:
struct cs_info * get_csconv();
const char * get_version();
-
- /* experimental functions */
+
+ /* experimental and deprecated functions */
#ifdef HUNSPELL_EXPERIMENTAL
- /* suffix is an affix flag string, similarly in dictionary files */
-
+ /* suffix is an affix flag string, similarly in dictionary files */
int put_word_suffix(const char * word, const char * suffix);
-
- /* morphological analysis */
-
- char * morph(const char * word);
- int analyze(char*** out, const char *word);
-
char * morph_with_correction(const char * word);
- /* stemmer function */
-
- int stem(char*** slst, const char * word);
-
/* spec. suggestions */
int suggest_auto(char*** slst, const char * word);
int suggest_pos_stems(char*** slst, const char * word);
@@ -133,6 +164,8 @@ private:
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
int is_keepcase(const hentry * rv);
int insert_sug(char ***slst, char * word, int ns);
+ char * cat_result(char * result, char * st);
+ char * stem_description(const char * desc);
};
diff --git a/src/myspell/license.hunspell b/src/myspell/license.hunspell
index 92c531c..81ffad8 100644
--- a/src/myspell/license.hunspell
+++ b/src/myspell/license.hunspell
@@ -14,7 +14,7 @@
* The Original Code is Hunspell, based on MySpell.
*
* The Initial Developers of the Original Code are
- * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Kevin Hendricks (MySpell) and Laszlo Nemeth (Hunspell).
* Portions created by the Initial Developers are Copyright (C) 2002-2005
* the Initial Developers. All Rights Reserved.
*
@@ -24,22 +24,22 @@
* Giuseppe Modugno
* Gianluca Turconi
* Simon Brouwer
- * Noll János
- * Bíró Árpád
- * Goldman Eleonóra
- * Sarlós Tamás
- * Bencsáth Boldizsár
- * Halácsy Péter
- * Dvornik László
- * Gefferth András
+ * Noll Janos
+ * Biro Arpad
+ * Goldman Eleonora
+ * Sarlos Tamas
+ * Bencsath Boldizsar
+ * Halacsy Peter
+ * Dvornik Laszlo
+ * Gefferth Andras
* Nagy Viktor
- * Varga Dániel
+ * Varga Daniel
* Chris Halls
* Rene Engelhard
* Bram Moolenaar
* Dafydd Jones
- * Harri Pitkänen
- * András Tímár
+ * Harri Pitkanen
+ * Andras Timar
* Tor Lillqvist
*
* Alternatively, the contents of this file may be used under the terms of
@@ -58,4 +58,4 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
-#endif \ No newline at end of file
+#endif
diff --git a/src/myspell/makefile.mk b/src/myspell/makefile.mk
new file mode 100644
index 0000000..8eeaebe
--- /dev/null
+++ b/src/myspell/makefile.mk
@@ -0,0 +1,113 @@
+#*************************************************************************
+#
+# $RCSfile: makefile.mk,v $
+#
+# $Revision: 1.7 $
+#
+# last change: $Author: vg $ $Date: 2003/06/12 10:38:24 $
+#
+# The Contents of this file are made available subject to the terms of
+# either of the following licenses
+#
+# - GNU Lesser General Public License Version 2.1
+# - Sun Industry Standards Source License Version 1.1
+#
+# Sun Microsystems Inc., October, 2000
+#
+# GNU Lesser General Public License Version 2.1
+# =============================================
+# Copyright 2000 by Sun Microsystems, Inc.
+# 901 San Antonio Road, Palo Alto, CA 94303, USA
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1, as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+#
+# Sun Industry Standards Source License Version 1.1
+# =================================================
+# The contents of this file are subject to the Sun Industry Standards
+# Source License Version 1.1 (the "License"); You may not use this file
+# except in compliance with the License. You may obtain a copy of the
+# License at http://www.openoffice.org/license.html.
+#
+# Software provided under this License is provided on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
+# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
+# See the License for the specific provisions governing your rights and
+# obligations concerning the Software.
+#
+# The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+#
+# Copyright: 2000 by Sun Microsystems, Inc.
+#
+# All Rights Reserved.
+#
+# Contributor(s): _______________________________________
+#
+#
+#
+#*************************************************************************
+
+PRJ = ..
+
+PRJNAME = hunspell
+TARGET = hunspell
+LIBTARGET=NO
+
+#----- Settings ---------------------------------------------------------
+
+.INCLUDE : settings.mk
+
+# --- Files --------------------------------------------------------
+
+# all_target: ALLTAR DICTIONARY
+all_target: ALLTAR
+
+##CXXFLAGS += -I..$/..$/lingutil
+##CFLAGSCXX += -I..$/..$/lingutil
+##CFLAGSCC += -I..$/..$/lingutil
+
+CDEFS+=-DOPENOFFICEORG
+
+SLOFILES= \
+ $(SLO)$/affentry.obj \
+ $(SLO)$/affixmgr.obj \
+ $(SLO)$/dictmgr.obj \
+ $(SLO)$/csutil.obj \
+ $(SLO)$/utf_info.obj \
+ $(SLO)$/hashmgr.obj \
+ $(SLO)$/suggestmgr.obj \
+ $(SLO)$/hunspell.obj
+
+LIB1TARGET= $(SLB)$/lib$(TARGET).lib
+LIB1ARCHIV= $(LB)/lib$(TARGET).a
+LIB1OBJFILES= $(SLOFILES)
+
+# DIC2BIN= \
+# en_US.aff \
+# en_US.dic
+#
+# de_DE.aff \
+# de_DE.dic
+
+
+# DICTIONARY :
+# +$(COPY) $(foreach,i,$(DIC2BIN) $i) $(BIN)
+
+
+# --- Targets ------------------------------------------------------
+
+.INCLUDE : target.mk
+
diff --git a/src/myspell/suggestmgr.cxx b/src/myspell/suggestmgr.cxx
index 964004c..b1a58f3 100644
--- a/src/myspell/suggestmgr.cxx
+++ b/src/myspell/suggestmgr.cxx
@@ -14,6 +14,7 @@
#endif
#include "suggestmgr.hxx"
+#include "htypes.hxx"
#ifndef MOZILLA_CLIENT
#ifndef W32
@@ -278,7 +279,7 @@ int SuggestMgr::suggest_auto(char*** slst, const char * w, int nsug)
if ((nsug < maxSug) && (nsug > -1))
nsug = mapchars(wlst, word, nsug, cpdsuggest);
- if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1; else *
+ if ((cpdsuggest==0) && (nsug>0)) nocompoundtwowords=1;
// perhaps we forgot to hit space and two words ran together
@@ -1055,6 +1056,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
scphon = scoresphon[MAX_ROOTS-1];
char w2[MAXWORDUTF8LEN];
+ char f[MAXSWUTF8L];
char * word = w;
// word reversing wrapper for complex prefixes
@@ -1085,7 +1087,6 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
strcpy(candidate, word);
mkallcap(candidate, csconv);
phonet(candidate, target, n, *ph);
-// fprintf(stderr, "Tip: %s->%s\n", candidate, target);
}
while ((hp = pHMgr->walk_hashtable(col, hp))) {
@@ -1095,19 +1096,19 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
TESTAFF(hp->astr, pAMgr->get_nosuggest(), hp->alen) ||
TESTAFF(hp->astr, pAMgr->get_onlyincompound(), hp->alen))) continue;
- sc = ngram(3, word, &(hp->word), NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
- leftcommonsubstring(word, &(hp->word));
+ sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
+ leftcommonsubstring(word, HENTRY_WORD(hp));
// check special pronounciation
- if (hp->var) {
- int sc2 = ngram(3, word, &(hp->word) + hp->blen + 1, NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
- leftcommonsubstring(word, &(hp->word) + hp->blen + 1);
+ if ((hp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
+ int sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + NGRAM_LOWERING) +
+ leftcommonsubstring(word, f);
if (sc2 > sc) sc = sc2;
}
if (ph && (sc > 2) && (abs(n - (int) hp->clen) <= 3)) {
char target2[MAXSWUTF8L];
- strcpy(candidate, &(hp->word));
+ strcpy(candidate, HENTRY_WORD(hp));
mkallcap(candidate, csconv);
phonet(candidate, target2, -1, *ph);
scphon = 2 * ngram(3, target, target2, NGRAM_LONGER_WORSE);
@@ -1126,7 +1127,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
if (scphon > scoresphon[lpphon]) {
scoresphon[lpphon] = scphon;
- rootsphon[lpphon] = &(hp->word);
+ rootsphon[lpphon] = HENTRY_WORD(hp);
lval = scphon;
for (j=0; j < MAX_ROOTS; j++)
if (scoresphon[j] < lval) {
@@ -1178,9 +1179,9 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr)
for (i = 0; i < MAX_ROOTS; i++) {
if (roots[i]) {
struct hentry * rp = roots[i];
- int nw = pAMgr->expand_rootword(glst, MAX_WORDS, &(rp->word), rp->blen,
+ int nw = pAMgr->expand_rootword(glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen,
rp->astr, rp->alen, word, nc,
- ((rp->var) ? &(rp->word) + rp->blen + 1 : NULL));
+ ((rp->var & H_OPT_PHON) ? copy_field(f, HENTRY_DATA(rp), MORPH_PHON) : NULL));
for (int k = 0; k < nw ; k++) {
sc = ngram(n, word, glst[k].word, NGRAM_ANY_MISMATCH + NGRAM_LOWERING) +
@@ -1383,7 +1384,7 @@ int SuggestMgr::checkword(const char * word, int len, int cpdsuggest, int * time
if ((rv->astr) && (TESTAFF(rv->astr,pAMgr->get_forbiddenword(),rv->alen)
|| TESTAFF(rv->astr,pAMgr->get_nosuggest(),rv->alen))) return 0;
while (rv) {
- if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_pseudoroot(),rv->alen) ||
+ if (rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) ||
TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) {
rv = rv->next_homonym;
@@ -1423,7 +1424,7 @@ int SuggestMgr::check_forbidden(const char * word, int len)
if (pAMgr) {
rv = pAMgr->lookup(word);
- if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_pseudoroot(),rv->alen) ||
+ if (rv && rv->astr && (TESTAFF(rv->astr,pAMgr->get_needaffix(),rv->alen) ||
TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) rv = NULL;
if (!(pAMgr->prefix_check(word,len,1)))
rv = pAMgr->suffix_check(word,len, 0, NULL, NULL, 0, NULL); // prefix+suffix, suffix
@@ -1434,184 +1435,6 @@ int SuggestMgr::check_forbidden(const char * word, int len)
}
#ifdef HUNSPELL_EXPERIMENTAL
-// suggest stems, XXX experimental code
-int SuggestMgr::suggest_stems(char*** slst, const char * w, int nsug)
-{
- char buf[MAXSWUTF8L];
- char ** wlst;
- int prevnsug = nsug;
-
- char w2[MAXWORDUTF8LEN];
- const char * word = w;
-
- // word reversing wrapper for complex prefixes
- if (complexprefixes) {
- strcpy(w2, w);
- if (utf8) reverseword_utf(w2); else reverseword(w2);
- word = w2;
- }
-
- if (*slst) {
- wlst = *slst;
- } else {
- wlst = (char **) calloc(maxSug, sizeof(char *));
- if (wlst == NULL) return -1;
- }
- // perhaps there are a fix stem in the dictionary
- if ((nsug < maxSug) && (nsug > -1)) {
-
- nsug = fixstems(wlst, word, nsug);
- if (nsug == prevnsug) {
- char * s = mystrdup(word);
- char * p = s + strlen(s);
- while ((*p != '-') && (p != s)) p--;
- if (*p == '-') {
- *p = '\0';
- nsug = fixstems(wlst, s, nsug);
- if ((nsug == prevnsug) && (nsug < maxSug) && (nsug >= 0)) {
- char * t;
- buf[0] = '\0';
- for (t = s; (t[0] != '\0') && ((t[0] >= '0') || (t[0] <= '9')); t++); // is a number?
- if (*t != '\0') strcpy(buf, "# ");
- strcat(buf, s);
- wlst[nsug] = mystrdup(buf);
- if (wlst[nsug] == NULL) return -1;
- nsug++;
- }
- p++;
- nsug = fixstems(wlst, p, nsug);
- }
-
- free(s);
- }
- }
-
- if (nsug < 0) {
- for (int i=0;i<maxSug; i++)
- if (wlst[i] != NULL) free(wlst[i]);
- free(wlst);
- return -1;
- }
-
- *slst = wlst;
- return nsug;
-}
-
-
-// there are fix stems in dictionary
-int SuggestMgr::fixstems(char ** wlst, const char * word, int ns)
-{
- char buf[MAXSWUTF8L];
- char prefix[MAXSWUTF8L] = "";
-
- int dicstem = 1; // 0 = lookup, 1= affix, 2 = compound
- int cpdindex = 0;
- struct hentry * rv = NULL;
-
- int wl = strlen(word);
- int cmpdstemnum;
- int cmpdstem[MAXCOMPOUND];
-
- if (pAMgr) {
- rv = pAMgr->lookup(word);
- if (rv) {
- dicstem = 0;
- } else {
- // try stripping off affixes
- rv = pAMgr->affix_check(word, wl);
-
- // else try check compound word
- if (!rv && pAMgr->get_compound()) {
- rv = pAMgr->compound_check(word, wl,
- 0, 0, 100, 0, NULL, 0, &cmpdstemnum, cmpdstem,1);
-
- if (rv) {
- dicstem = 2;
- for (int j = 0; j < cmpdstemnum; j++) {
- cpdindex += cmpdstem[j];
- }
- if(! (pAMgr->lookup(word + cpdindex)))
- pAMgr->affix_check(word + cpdindex, wl - cpdindex); // for prefix
- }
- }
-
-
- if (pAMgr->get_prefix()) {
- strcpy(prefix, pAMgr->get_prefix());
- }
-
- // XXX obsolete, will be a general solution for stemming
- if ((prefix) && (strncmp(prefix, "leg", 3)==0)) prefix[0] = '\0'; // (HU)
- }
-
- }
-
-
-
- if ((rv) && (ns < maxSug)) {
-
- // check fixstem flag and not_valid_stem flag
- // first word
- if ((ns < maxSug) && (dicstem < 2)) {
- strcpy(buf, prefix);
- if ((dicstem > 0) && pAMgr->get_derived()) {
- // XXX obsolote
- if (strlen(prefix) == 1) {
- strcat(buf, (pAMgr->get_derived()) + 1);
- } else {
- strcat(buf, pAMgr->get_derived());
- }
- } else {
- // special stem in affix description
- const char * wordchars = pAMgr->get_wordchars();
- if (rv->description &&
- (strchr(wordchars, *(rv->description)))) {
- char * desc = (rv->description) + 1;
- while (strchr(wordchars, *desc)) desc++;
- strncat(buf, rv->description, desc - (rv->description));
- } else {
- strcat(buf, rv->word);
- }
- }
- wlst[ns] = mystrdup(buf);
- if (wlst[ns] == NULL) return -1;
- ns++;
- }
-
- if (dicstem == 2) {
-
- // compound stem
-
-// if (rv->astr && (strchr(rv->astr, '0') == NULL)) {
- if (rv->astr) {
- strcpy(buf, word);
- buf[cpdindex] = '\0';
- if (prefix) strcat(buf, prefix);
- if (pAMgr->get_derived()) {
- strcat(buf, pAMgr->get_derived());
- } else {
- // special stem in affix description
- const char * wordchars = pAMgr->get_wordchars();
- if (rv->description &&
- (strchr(wordchars, *(rv->description)))) {
- char * desc = (rv->description) + 1;
- while (strchr(wordchars, *desc)) desc++;
- strncat(buf, rv->description, desc - (rv->description));
- } else {
- strcat(buf, rv->word);
- }
- }
- if (ns < maxSug) {
- wlst[ns] = mystrdup(buf);
- if (wlst[ns] == NULL) return -1;
- ns++;
- }
- }
- }
- }
- return ns;
-}
-
// suggest possible stems
int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug)
{
@@ -1651,6 +1474,7 @@ int SuggestMgr::suggest_pos_stems(char*** slst, const char * w, int nsug)
*slst = wlst;
return nsug;
}
+#endif // END OF HUNSPELL_EXPERIMENTAL CODE
char * SuggestMgr::suggest_morph(const char * w)
@@ -1679,13 +1503,18 @@ char * SuggestMgr::suggest_morph(const char * w)
while (rv) {
if ((!rv->astr) || !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
- TESTAFF(rv->astr, pAMgr->get_pseudoroot(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
TESTAFF(rv->astr,pAMgr->get_onlyincompound(),rv->alen))) {
- if (rv->description && ((!rv->astr) ||
- !TESTAFF(rv->astr, pAMgr->get_lemma_present(), rv->alen)))
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ strcat(result, " ");
+ strcat(result, MORPH_STEM);
strcat(result, word);
- if (rv->description) strcat(result, rv->description);
- strcat(result, "\n");
+ }
+ if (HENTRY_DATA(rv)) {
+ strcat(result, " ");
+ strcat(result, HENTRY_DATA(rv));
+ }
+ strcat(result, "\n");
}
rv = rv->next_homonym;
}
@@ -1700,9 +1529,10 @@ char * SuggestMgr::suggest_morph(const char * w)
pAMgr->compound_check_morph(word, strlen(word),
0, 0, 100, 0,NULL, 0, &r, NULL);
- return (*result) ? mystrdup(line_uniq(delete_zeros(result))) : NULL;
+ return (*result) ? mystrdup(line_uniq(result, MSEP_REC)) : NULL;
}
+#ifdef HUNSPELL_EXPERIMENTAL
char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
{
char * p = NULL;
@@ -1710,7 +1540,7 @@ char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
if (!**wlst) return NULL;
// we will use only the first suggestion
for (int i = 0; i < maxSug - 1; i++) wlst[i] = "";
- int ns = suggest(&wlst, word, maxSug - 1);
+ int ns = suggest(&wlst, word, maxSug - 1, NULL);
if (ns == maxSug) {
p = suggest_morph(wlst[maxSug - 1]);
free(wlst[maxSug - 1]);
@@ -1720,6 +1550,153 @@ char * SuggestMgr::suggest_morph_for_spelling_error(const char * word)
}
#endif // END OF HUNSPELL_EXPERIMENTAL CODE
+/* affixation */
+char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern)
+{
+ char result[MAXLNLEN];
+ *result = '\0';
+ int sfxcount = get_sfxcount(pattern);
+
+// if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL;
+
+ if (HENTRY_DATA(rv)) {
+ char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
+ HENTRY_DATA(rv), pattern, 0);
+ if (aff) {
+ strcat(result, aff);
+ strcat(result, "\n");
+ free(aff);
+ }
+ }
+
+ // check all allomorphs
+ char allomorph[MAXLNLEN];
+ char * p = NULL;
+ if (HENTRY_DATA(rv)) p = strstr(HENTRY_DATA(rv), MORPH_ALLOMORPH);
+ while (p) {
+ struct hentry * rv2 = NULL;
+ p += MORPH_TAG_LEN;
+ int plen = fieldlen(p);
+ strncpy(allomorph, p, plen);
+ allomorph[plen] = '\0';
+ rv2 = pAMgr->lookup(allomorph);
+ while (rv2) {
+// if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= sfxcount) {
+ if (HENTRY_DATA(rv2)) {
+ char * st = strstr(HENTRY_DATA(rv2), MORPH_STEM);
+ if (st && (strncmp(st + MORPH_TAG_LEN,
+ HENTRY_WORD(rv), fieldlen(st + MORPH_TAG_LEN)) == 0)) {
+ char * aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, rv2->alen,
+ HENTRY_DATA(rv2), pattern, 0);
+ if (aff) {
+ strcat(result, aff);
+ strcat(result, "\n");
+ free(aff);
+ }
+ }
+ }
+ rv2 = rv2->next_homonym;
+ }
+ p = strstr(p + plen, MORPH_ALLOMORPH);
+ }
+
+ return (*result) ? mystrdup(result) : NULL;
+}
+
+char * SuggestMgr::suggest_gen(char ** desc, int n, char * pattern) {
+ char result[MAXLNLEN];
+ char result2[MAXLNLEN];
+ char newpattern[MAXLNLEN];
+ *newpattern = '\0';
+ if (n == 0) return 0;
+ *result2 = '\0';
+ struct hentry * rv = NULL;
+ if (!pAMgr) return NULL;
+
+// search affixed forms with and without derivational suffixes
+ while(1) {
+
+ for (int k = 0; k < n; k++) {
+ *result = '\0';
+ // add compound word parts (except the last one)
+ char * s = (char *) desc[k];
+ char * part = strstr(s, MORPH_PART);
+ if (part) {
+ char * nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ copy_field(result + strlen(result), part, MORPH_PART);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ char **pl;
+ char tok[MAXLNLEN];
+ strcpy(tok, s);
+ char * alt = strstr(tok, " | ");
+ while (alt) {
+ alt[1] = MSEP_ALT;
+ alt = strstr(alt, " | ");
+ }
+ int pln = line_tok(tok, &pl, MSEP_ALT);
+ for (int i = 0; i < pln; i++) {
+ // remove inflectional and terminal suffixes
+ char * is = strstr(pl[i], MORPH_INFL_SFX);
+ if (is) *is = '\0';
+ char * ts = strstr(pl[i], MORPH_TERM_SFX);
+ while (ts) {
+ *ts = '_';
+ ts = strstr(pl[i], MORPH_TERM_SFX);
+ }
+ char * st = strstr(s, MORPH_STEM);
+ if (st) {
+ copy_field(tok, st, MORPH_STEM);
+ rv = pAMgr->lookup(tok);
+ while (rv) {
+ char newpat[MAXLNLEN];
+ strcpy(newpat, pl[i]);
+ strcat(newpat, pattern);
+ char * sg = suggest_hentry_gen(rv, newpat);
+ if (!sg) sg = suggest_hentry_gen(rv, pattern);
+ if (sg) {
+ char ** gen;
+ int genl = line_tok(sg, &gen, MSEP_REC);
+ free(sg);
+ sg = NULL;
+ for (int j = 0; j < genl; j++) {
+ if (strstr(pl[i], MORPH_SURF_PFX)) {
+ int r2l = strlen(result2);
+ result2[r2l] = MSEP_REC;
+ strcpy(result2 + r2l + 1, result);
+ copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX);
+ strcat(result2, gen[j]);
+ } else {
+ sprintf(result2 + strlen(result2), "%c%s%s",
+ MSEP_REC, result, gen[j]);
+ }
+ }
+ freelist(&gen, genl);
+ }
+ rv = rv->next_homonym;
+ }
+ }
+ }
+ freelist(&pl, pln);
+ }
+
+ if (*result2 || !strstr(pattern, MORPH_DERI_SFX)) break;
+ strcpy(newpattern, pattern);
+ pattern = newpattern;
+ char * ds = strstr(pattern, MORPH_DERI_SFX);
+ while (ds) {
+ strncpy(ds, MORPH_TERM_SFX, MORPH_TAG_LEN);
+ ds = strstr(pattern, MORPH_DERI_SFX);
+ }
+ }
+ return (*result2 ? mystrdup(result2) : NULL);
+}
+
// generate an n-gram score comparing s1 and s2
int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt)
diff --git a/src/myspell/suggestmgr.hxx b/src/myspell/suggestmgr.hxx
index 28fa1ac..d22884f 100644
--- a/src/myspell/suggestmgr.hxx
+++ b/src/myspell/suggestmgr.hxx
@@ -57,6 +57,7 @@ public:
int suggest_pos_stems(char*** slst, const char * word, int nsug);
char * suggest_morph(const char * word);
+ char * suggest_gen(char ** pl, int pln, char * pattern);
char * suggest_morph_for_spelling_error(const char * word);
private:
@@ -98,6 +99,7 @@ private:
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
int lcslen(const char * s, const char* s2);
+ char * suggest_hentry_gen(hentry * rv, char * pattern);
};