@@ -6,8 +6,8 @@ use structopt::StructOpt;
6
6
fn generate < W : std:: io:: Write > ( file : & mut W , dict : & [ u8 ] ) {
7
7
let mut wtr = csv:: Writer :: from_writer ( file) ;
8
8
9
- let disallowed_typos = disallowed_typos ( ) ;
10
- let related_words = related_words ( ) ;
9
+ let disallowed_typos = varcon_words ( ) ;
10
+ let word_variants = proper_word_variants ( ) ;
11
11
12
12
let mut reader = csv:: ReaderBuilder :: new ( )
13
13
. has_headers ( false )
@@ -19,7 +19,7 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
19
19
if disallowed_typos. contains ( & unicase:: UniCase :: new ( typo) ) {
20
20
continue ;
21
21
}
22
- let correction = related_words
22
+ let correction = word_variants
23
23
. get ( correction)
24
24
. and_then ( |words| find_best_match ( typo, correction, words) )
25
25
. unwrap_or ( correction) ;
@@ -28,7 +28,9 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
28
28
wtr. flush ( ) . unwrap ( ) ;
29
29
}
30
30
31
- fn disallowed_typos ( ) -> HashSet < unicase:: UniCase < & ' static str > > {
31
+ fn varcon_words ( ) -> HashSet < unicase:: UniCase < & ' static str > > {
32
+ // Even include improper ones because we should be letting varcon handle that rather than our
33
+ // dictionary
32
34
varcon:: VARCON
33
35
. iter ( )
34
36
. flat_map ( |c| c. entries . iter ( ) )
@@ -37,7 +39,7 @@ fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
37
39
. collect ( )
38
40
}
39
41
40
- fn related_words ( ) -> HashMap < & ' static str , HashSet < & ' static str > > {
42
+ fn proper_word_variants ( ) -> HashMap < & ' static str , HashSet < & ' static str > > {
41
43
let mut words: HashMap < & ' static str , HashSet < & ' static str > > = HashMap :: new ( ) ;
42
44
for entry in varcon:: VARCON . iter ( ) . flat_map ( |c| c. entries . iter ( ) ) {
43
45
let variants: HashSet < _ > = entry
@@ -57,11 +59,11 @@ fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
57
59
fn find_best_match < ' c > (
58
60
typo : & ' c str ,
59
61
correction : & ' c str ,
60
- related_words : & HashSet < & ' static str > ,
62
+ word_variants : & HashSet < & ' static str > ,
61
63
) -> Option < & ' c str > {
62
- assert ! ( !related_words . contains( correction) ) ;
64
+ assert ! ( !word_variants . contains( correction) ) ;
63
65
let current = edit_distance:: edit_distance ( typo, correction) ;
64
- let mut matches: Vec < _ > = related_words
66
+ let mut matches: Vec < _ > = word_variants
65
67
. iter ( )
66
68
. map ( |r| ( edit_distance:: edit_distance ( typo, r) , * r) )
67
69
. filter ( |( d, _) | * d < current)
0 commit comments