blob: 94efada58ee3267c034ffc233ef9d4d65d2ec65a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
/* Stemmer for Esperanto in UTF-8 */
strings ()
integers ()
booleans ( foreign )
routines (
apostrophe
canonical_form
correlative
interjection
short_word
standard_suffix
unuj
)
externals ( stem )
groupings ( vowel aiou ao ou )
stringdef a' decimal '225'
stringdef e' hex 'E9'
stringdef i' hex 'ED'
stringdef o' hex ' f3'
stringdef u' hex 'fa '
stringdef cx hex '0109'
stringdef gx hex '011D'
stringdef hx hex '0125'
stringdef jx hex '0135'
stringdef sx hex '015D'
stringdef ux hex '016D'
define canonical_form as repeat (
[substring]
among (
stringescapes //
'/a'/' (<- 'a' set foreign)
'/e'/' (<- 'e' set foreign)
'/i'/' (<- 'i' set foreign)
'/o'/' (<- 'o' set foreign)
'/u'/' (<- 'u' set foreign)
stringescapes `'
'cx' (<- '`cx'')
'gx' (<- '`gx'')
'hx' (<- '`hx'')
'jx' (<- '`jx'')
'sx' (<- '`sx'')
'ux' (<- '`ux'')
'' (next)
)
)
backwardmode (
stringescapes { }
define apostrophe as (
(['un{'}'] atlimit <- 'unu') or
(['l{'}'] atlimit <- 'la') or
(['{'}'] <- 'o')
)
define vowel 'aeiou'
define aiou vowel - 'e'
define ao 'ao'
define ou 'ou'
define short_word as not (loop (maxint * 0 + 4 / 2) gopast vowel)
define interjection as (
among ('adia{ux}' 'aha' 'amen' 'hola' 'hura' 'mia{ux}' 'muu' 'oho')
atlimit
)
define correlative as (
[]
// Ignore -al, -am, etc. since they can't be confused with suffixes.
test (
('a' or (try 'n'] 'e') or (try 'n' try 'j'] ou))
'i'
try ('k' or 't' or '{cx}' or 'nen')
atlimit
)
delete
)
define unuj as (
[try 'n' 'j'] 'unu' atlimit delete
)
define standard_suffix as (
[
try ((try 'n' try 'j' ao) or (try 's' aiou) or (try 'n' 'e'))
try '-' try 'a{ux}'
] delete
)
)
define stem as (
do canonical_form
not foreign
backwards (
do apostrophe
short_word or interjection or
correlative or unuj or do standard_suffix
)
)
|