summaryrefslogtreecommitdiff
path: root/src/ispell/ispell_checker.h
blob: af4acda6bc657c6fec8d1a5e80edfba7643e33f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
#ifndef ISPELL_CHECKER_H
#define ISPELL_CHECKER_H

#include <glib.h> // give glib a chance to override MAXPATHLEN first before it is set in ispell.h
#include "ispell.h"
#include "enchant.h"

class ISpellChecker
{
public:
	ISpellChecker(EnchantBroker * broker);
	~ISpellChecker();

	bool checkWord(const char * const word, size_t len);
	char ** suggestWord(const char * const word, size_t len, size_t * out_n_suggs);

	bool requestDictionary (const char * szLang);

private:
	EnchantBroker* m_broker;

	ISpellChecker();
	ISpellChecker(const ISpellChecker&);	// no impl
	void operator=(const ISpellChecker&);	// no impl

	char * loadDictionary (const char * szLang );
	bool   loadDictionaryForLanguage ( const char * szLang );
	void   setDictionaryEncoding ( const char * enc );

	//
	// The member functions after this point were formerly global functions
	//  passed a context structure pointer...
	//

	void try_autodetect_charset(const char * inEncoding);

	//
	// From ispell correct.c
	//

	int		casecmp P ((char * a, char * b, int canonical));
	void		makepossibilities P ((ichar_t * word));
	int	insert P ((ichar_t * word));
#ifndef NO_CAPITALIZATION_SUPPORT
	void	wrongcapital P ((ichar_t * word));
#endif /* NO_CAPITALIZATION_SUPPORT */
	void	wrongletter P ((ichar_t * word));
	void	extraletter P ((ichar_t * word));
	void	missingletter P ((ichar_t * word));
	void	missingspace P ((ichar_t * word));
	int		compoundgood P ((ichar_t * word, int pfxopts));
	void	transposedletter P ((ichar_t * word));
	int	ins_cap P ((ichar_t * word, ichar_t * pattern));
	int	save_cap P ((ichar_t * word, ichar_t * pattern,
			  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]));
	int		ins_root_cap P ((ichar_t * word, ichar_t * pattern,
			  int prestrip, int preadd, int sufstrip, int sufadd,
			  struct dent * firstdent, struct flagent * pfxent,
			  struct flagent * sufent));
	void	save_root_cap P ((ichar_t * word, ichar_t * pattern,
			  int prestrip, int preadd, int sufstrip, int sufadd,
			  struct dent * firstdent, struct flagent * pfxent,
			  struct flagent * sufent,
			  ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN],
			  int * nsaved));

	//
	// From ispell good.c
	//

	int good (ichar_t* w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
	void chk_aff (ichar_t* word, ichar_t* ucword, int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts);
	int linit(char*);
	struct dent * ispell_lookup (ichar_t* s, int dotree);
	int strtoichar (ichar_t* out, char* in, int outlen, int canonical);
	int ichartostr (char* out, ichar_t* in, int outlen, int canonical);
	char * ichartosstr (ichar_t* in, int canonical);
	int	findfiletype (const char * name, int searchnames, int * deformatter);
	long whatcap (ichar_t* word);

	/*
		HACK: macros replaced with function implementations 
		so we could do a side-effect-free check for unicode
		characters which aren't in hashheader
	*/
	char myupper(ichar_t c);
	char mylower(ichar_t c);
	int myspace(ichar_t c);
	char iswordch(ichar_t c);
	char isboundarych(ichar_t c);
	char isstringstart(ichar_t c);
	ichar_t mytolower(ichar_t c);
	ichar_t mytoupper(ichar_t c);

#ifndef ICHAR_IS_CHAR
	int cap_ok (ichar_t* word, struct success* hit, int len);

	int hash (ichar_t* s, int hashtblsize);
#endif

	//
	// From ispell lookup.c
	//

	void	clearindex P ((struct flagptr * indexp));
	void     initckch P ((char *));

	void alloc_ispell_struct();
	void free_ispell_struct();

	//
	// From ispell makedent.c
	//

	int		addvheader P ((struct dent * ent));
	void		upcase P ((ichar_t * string));
	void		lowcase P ((ichar_t * string));
	void		chupcase P ((char * s));

	int		stringcharlen P ((char * bufp, int canonical));
	ichar_t *	strtosichar P ((char * in, int canonical));
	char *		printichar P ((int in));

	//
	// From ispell tgood.c
	//

	void	pfx_list_chk P ((ichar_t * word, ichar_t * ucword,
			  int len, int optflags, int sfxopts, struct flagptr * ind,
			  int ignoreflagbits, int allhits));
	void	chk_suf P ((ichar_t * word, ichar_t * ucword, int len,
			  int optflags, struct flagent * pfxent, int ignoreflagbits,
			  int allhits));
	void	suf_list_chk P ((ichar_t * word, ichar_t * ucword, int len,
			  struct flagptr * ind, int optflags, struct flagent * pfxent,
			  int ignoreflagbits, int allhits));
	int		expand_pre P ((char * croot, ichar_t * rootword,
			  MASKTYPE mask[], int option, char * extra));
	int	pr_pre_expansion P ((char * croot, ichar_t * rootword,
			  struct flagent * flent, MASKTYPE mask[], int option,
			  char * extra));
	int		expand_suf P ((char * croot, ichar_t * rootword,
			  MASKTYPE mask[], int optflags, int option, char * extra));
	int	pr_suf_expansion P ((char * croot, ichar_t * rootword,
			  struct flagent * flent, int option, char * extra));
	void	forcelc P ((ichar_t * dst, int len));

	/* this is used for converting form unsigned short to UCS-4 */

	int deftflag;              /* NZ for TeX mode by default */
	int prefstringchar;        /* Preferred string character type */
	bool m_bSuccessfulInit;

	//
	// The members after this point were formerly global variables
	//  in the original ispell code
	//

	char *	m_BC;	/* backspace if not ^H */
	char *	m_cd;	/* clear to end of display */
	char *	m_cl;	/* clear display */
	char *	m_cm;	/* cursor movement */
	char *	m_ho;	/* home */
	char *	m_nd;	/* non-destructive space */
	char *	m_so;	/* standout */
	char *	m_se;	/* standout end */
	int	m_sg;	/* space taken by so/se */
	char *	m_ti;	/* terminal initialization sequence */
	char *	m_te;	/* terminal termination sequence */
	int	m_li;	/* lines */
	int	m_co;	/* columns */

	char	m_ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */
	ichar_t	m_itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */

	int	m_numhits;	/* number of hits in dictionary lookups */
	struct success
			m_hits[MAX_HITS]; /* table of hits gotten in lookup */

	char *	m_hashstrings;	/* Strings in hash table */
	struct hashheader
			m_hashheader;	/* Header of hash table */
	struct dent *
			m_hashtbl;	/* Main hash table, for dictionary */
	int	m_hashsize;	/* Size of main hash table */

	char	m_hashname[MAXPATHLEN]; /* Name of hash table file */

	int	m_aflag;		/* NZ if -a or -A option specified */
	int	m_cflag;		/* NZ if -c (crunch) option */
	int	m_lflag;		/* NZ if -l (list) option */
	int	m_incfileflag;	/* whether xgets() acts exactly like gets() */
	int	m_nodictflag;	/* NZ if dictionary not needed */

	int	m_uerasechar;	/* User's erase character, from stty */
	int	m_ukillchar;	/* User's kill character */

	unsigned int m_laststringch; /* Number of last string character */
	int	m_defdupchar;	/* Default duplicate string type */

	int	m_numpflags;		/* Number of prefix flags in table */
	int	m_numsflags;		/* Number of suffix flags in table */
	struct flagptr m_pflagindex[SET_SIZE + MAXSTRINGCHARS];
						/* Fast index to pflaglist */
	struct flagent *	m_pflaglist;	/* Prefix flag control list */
	struct flagptr m_sflagindex[SET_SIZE + MAXSTRINGCHARS];
						/* Fast index to sflaglist */
	struct flagent *	m_sflaglist;	/* Suffix flag control list */

	struct strchartype *		/* String character type collection */
			m_chartypes;

	FILE *	m_infile;			/* File being corrected */
	FILE *	m_outfile;		/* Corrected copy of infile */

	char *	m_askfilename;		/* File specified in -f option */

	int	m_changes;		/* NZ if changes made to cur. file */
	int	m_readonly;		/* NZ if current file is readonly */
	int	m_quit;			/* NZ if we're done with this file */

#define MAXPOSSIBLE	100	/* Max no. of possibilities to generate */

	char	m_possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN];
					/* Table of possible corrections */
	int	m_pcount;		/* Count of possibilities generated */
	int	m_maxposslen;	/* Length of longest possibility */
	int	m_easypossibilities; /* Number of "easy" corrections found */
					/* ..(defined as those using legal affixes) */

	/*
	 * The following array contains a list of characters that should be tried
	 * in "missingletter."  Note that lowercase characters are omitted.
	 */
	int	m_Trynum;		/* Size of "Try" array */
	ichar_t	m_Try[SET_SIZE + MAXSTRINGCHARS];

	GIConv  m_translate_in; /* Selected translation from/to Unicode */
	GIConv  m_translate_out;
};

#endif /* ISPELL_CHECKER_H */