blob: e5106abba2c178021c53492a0acf3d6bc7cec90e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
# frozen-string-literal: true
require_relative "levenshtein"
require_relative "jaro_winkler"
module DidYouMean
class SpellChecker
def initialize(dictionary:)
@dictionary = dictionary
end
def correct(input)
input = normalize(input)
threshold = input.length > 3 ? 0.834 : 0.77
words = @dictionary.select { |word| JaroWinkler.distance(normalize(word), input) >= threshold }
words.reject! { |word| input == word.to_s }
words.sort_by! { |word| JaroWinkler.distance(word.to_s, input) }
words.reverse!
# Correct mistypes
threshold = (input.length * 0.25).ceil
corrections = words.select { |c| Levenshtein.distance(normalize(c), input) <= threshold }
# Correct misspells
if corrections.empty?
corrections = words.select do |word|
word = normalize(word)
length = input.length < word.length ? input.length : word.length
Levenshtein.distance(word, input) < length
end.first(1)
end
corrections
end
private
def normalize(str_or_symbol) #:nodoc:
str = str_or_symbol.to_s.downcase
str.tr!("@", "")
str
end
end
end
|