Skip to content

Commit 444d2cc

Browse files
authored
Merge pull request #251 from epage/vars
fix(dict): Correctly connect dict with varcon
2 parents fa7ce95 + 04e55e4 commit 444d2cc

File tree

2 files changed

+87
-10
lines changed

2 files changed

+87
-10
lines changed

crates/typos-dict/verify/src/main.rs

+10-8
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use structopt::StructOpt;
66
fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
77
let mut wtr = csv::Writer::from_writer(file);
88

9-
let disallowed_typos = disallowed_typos();
10-
let related_words = related_words();
9+
let disallowed_typos = varcon_words();
10+
let word_variants = proper_word_variants();
1111

1212
let mut reader = csv::ReaderBuilder::new()
1313
.has_headers(false)
@@ -19,7 +19,7 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
1919
if disallowed_typos.contains(&unicase::UniCase::new(typo)) {
2020
continue;
2121
}
22-
let correction = related_words
22+
let correction = word_variants
2323
.get(correction)
2424
.and_then(|words| find_best_match(typo, correction, words))
2525
.unwrap_or(correction);
@@ -28,7 +28,9 @@ fn generate<W: std::io::Write>(file: &mut W, dict: &[u8]) {
2828
wtr.flush().unwrap();
2929
}
3030

31-
fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
31+
fn varcon_words() -> HashSet<unicase::UniCase<&'static str>> {
32+
// Even include improper ones because we should be letting varcon handle that rather than our
33+
// dictionary
3234
varcon::VARCON
3335
.iter()
3436
.flat_map(|c| c.entries.iter())
@@ -37,7 +39,7 @@ fn disallowed_typos() -> HashSet<unicase::UniCase<&'static str>> {
3739
.collect()
3840
}
3941

40-
fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
42+
fn proper_word_variants() -> HashMap<&'static str, HashSet<&'static str>> {
4143
let mut words: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
4244
for entry in varcon::VARCON.iter().flat_map(|c| c.entries.iter()) {
4345
let variants: HashSet<_> = entry
@@ -57,11 +59,11 @@ fn related_words() -> HashMap<&'static str, HashSet<&'static str>> {
5759
fn find_best_match<'c>(
5860
typo: &'c str,
5961
correction: &'c str,
60-
related_words: &HashSet<&'static str>,
62+
word_variants: &HashSet<&'static str>,
6163
) -> Option<&'c str> {
62-
assert!(!related_words.contains(correction));
64+
assert!(!word_variants.contains(correction));
6365
let current = edit_distance::edit_distance(typo, correction);
64-
let mut matches: Vec<_> = related_words
66+
let mut matches: Vec<_> = word_variants
6567
.iter()
6668
.map(|r| (edit_distance::edit_distance(typo, r), *r))
6769
.filter(|(d, _)| *d < current)

src/dict.rs

+77-2
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,14 @@ impl BuiltIn {
3535

3636
let word = word_token.token();
3737
let mut corrections = if let Some(correction) = self.correct_with_dict(word) {
38-
self.correct_with_vars(word)
39-
.unwrap_or_else(|| Status::Corrections(vec![Cow::Borrowed(correction)]))
38+
match self.correct_with_vars(correction) {
39+
Some(Status::Valid) => Status::Corrections(vec![Cow::Borrowed(correction)]),
40+
Some(correction @ Status::Corrections(_)) => correction,
41+
Some(Status::Invalid) => {
42+
unreachable!("correct_with_vars should always have valid suggestions")
43+
}
44+
None => Status::Corrections(vec![Cow::Borrowed(correction)]),
45+
}
4046
} else {
4147
self.correct_with_vars(word)?
4248
};
@@ -244,6 +250,75 @@ impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> {
244250
mod test {
245251
use super::*;
246252

253+
#[cfg(feature = "dict")]
254+
#[test]
255+
fn test_dict_correct() {
256+
let dict = BuiltIn::new(crate::config::Locale::default());
257+
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
258+
"finallizes",
259+
typos::tokens::Case::Lower,
260+
0,
261+
));
262+
assert_eq!(
263+
correction,
264+
Some(Status::Corrections(vec!["finalizes".into()]))
265+
);
266+
}
267+
268+
#[cfg(feature = "vars")]
269+
#[test]
270+
fn test_varcon_no_locale() {
271+
let dict = BuiltIn::new(crate::config::Locale::En);
272+
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
273+
"finalizes",
274+
typos::tokens::Case::Lower,
275+
0,
276+
));
277+
assert_eq!(correction, Some(Status::Valid));
278+
}
279+
280+
#[cfg(feature = "vars")]
281+
#[test]
282+
fn test_varcon_same_locale() {
283+
let dict = BuiltIn::new(crate::config::Locale::EnUs);
284+
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
285+
"finalizes",
286+
typos::tokens::Case::Lower,
287+
0,
288+
));
289+
assert_eq!(correction, Some(Status::Valid));
290+
}
291+
292+
#[cfg(feature = "vars")]
293+
#[test]
294+
fn test_varcon_different_locale() {
295+
let dict = BuiltIn::new(crate::config::Locale::EnGb);
296+
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
297+
"finalizes",
298+
typos::tokens::Case::Lower,
299+
0,
300+
));
301+
assert_eq!(
302+
correction,
303+
Some(Status::Corrections(vec!["finalises".into()]))
304+
);
305+
}
306+
307+
#[cfg(all(feature = "dict", feature = "vars"))]
308+
#[test]
309+
fn test_dict_to_varcon() {
310+
let dict = BuiltIn::new(crate::config::Locale::EnGb);
311+
let correction = dict.correct_word(typos::tokens::Word::new_unchecked(
312+
"finallizes",
313+
typos::tokens::Case::Lower,
314+
0,
315+
));
316+
assert_eq!(
317+
correction,
318+
Some(Status::Corrections(vec!["finalises".into()]))
319+
);
320+
}
321+
247322
#[test]
248323
fn test_case_correct() {
249324
let cases = [

0 commit comments

Comments
 (0)