Skip to content

Commit b99f32d

Browse files
author
Ed Page
committed
perf(dict): Bypass vars when possible
Variant support slows us down by 10-50$. I assume most people will run with `en` and so most of this overhead is to waste. So instead of merging vars with dict, let's instead get a quick win by just skipping vars when we don't need to. If the assumptions behind this change over time or if there is need for speeding up a specific locale, we can re-address this. Before: ``` check_file/Typos/code time: [35.860 us 36.021 us 36.187 us] thrpt: [8.0117 MiB/s 8.0486 MiB/s 8.0846 MiB/s] check_file/Typos/corpus time: [26.966 ms 27.215 ms 27.521 ms] thrpt: [21.127 MiB/s 21.365 MiB/s 21.562 MiB/s] ``` After: ``` check_file/Typos/code time: [33.837 us 33.928 us 34.031 us] thrpt: [8.5191 MiB/s 8.5452 MiB/s 8.5680 MiB/s] check_file/Typos/corpus time: [17.521 ms 17.620 ms 17.730 ms] thrpt: [32.794 MiB/s 32.999 MiB/s 33.184 MiB/s] ``` This puts us inline with `--no-default-features --features dict` Fixes #253
1 parent d65fa79 commit b99f32d

File tree

3 files changed

+46
-17
lines changed

3 files changed

+46
-17
lines changed

crates/typos-vars/codegen/src/main.rs

+16
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
7878

7979
let mut smallest = usize::MAX;
8080
let mut largest = usize::MIN;
81+
let mut no_invalid = true;
8182

8283
writeln!(
8384
file,
@@ -97,6 +98,8 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
9798
builder.entry(unicase::UniCase::new(word), &value);
9899
smallest = std::cmp::min(smallest, word.len());
99100
largest = std::cmp::max(largest, word.len());
101+
102+
no_invalid &= !is_always_invalid(data);
100103
}
101104
let codegenned = builder.build();
102105
writeln!(file, "{}", codegenned).unwrap();
@@ -110,6 +113,10 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
110113
)
111114
.unwrap();
112115

116+
writeln!(file).unwrap();
117+
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();
118+
119+
writeln!(file).unwrap();
113120
for (symbol, entry) in entries.iter() {
114121
if !referenced_symbols.contains(symbol.as_str()) {
115122
continue;
@@ -156,6 +163,15 @@ fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
156163
false
157164
}
158165

166+
fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool {
167+
for (_symbol, set) in data.iter() {
168+
if set.is_empty() {
169+
return true;
170+
}
171+
}
172+
false
173+
}
174+
159175
fn entries() -> BTreeMap<String, varcon_core::Entry> {
160176
varcon::VARCON
161177
.iter()

crates/typos-vars/src/vars_codegen.rs

+3
Original file line numberDiff line numberDiff line change
@@ -113083,6 +113083,9 @@ pub static VARS_DICTIONARY: phf::Map<
113083113083
};
113084113084

113085113085
pub const WORD_RANGE: std::ops::RangeInclusive<usize> = 2..=24;
113086+
113087+
pub const NO_INVALID: bool = true;
113088+
113086113089
pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
113087113090
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];
113088113091

src/dict.rs

+27-17
Original file line numberDiff line numberDiff line change
@@ -72,33 +72,43 @@ impl BuiltIn {
7272
#[cfg(feature = "vars")]
7373
impl BuiltIn {
7474
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
75-
let mut chained: Vec<_> = corrections
76-
.iter()
77-
.flat_map(|c| match self.correct_with_vars(c) {
78-
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
79-
Some(Status::Corrections(vars)) => vars,
80-
Some(Status::Invalid) => {
81-
unreachable!("correct_with_vars should always have valid suggestions")
82-
}
83-
})
84-
.collect();
85-
if chained.len() != 1 {
86-
chained.sort_unstable();
87-
chained.dedup();
75+
if self.is_vars_enabled() {
76+
let mut chained: Vec<_> = corrections
77+
.iter()
78+
.flat_map(|c| match self.correct_with_vars(c) {
79+
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
80+
Some(Status::Corrections(vars)) => vars,
81+
Some(Status::Invalid) => {
82+
unreachable!("correct_with_vars should always have valid suggestions")
83+
}
84+
})
85+
.collect();
86+
if chained.len() != 1 {
87+
chained.sort_unstable();
88+
chained.dedup();
89+
}
90+
debug_assert!(!chained.is_empty());
91+
Status::Corrections(chained)
92+
} else {
93+
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
8894
}
89-
debug_assert!(!chained.is_empty());
90-
Status::Corrections(chained)
9195
}
9296

9397
fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
94-
if typos_vars::WORD_RANGE.contains(&word.len()) {
98+
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
9599
map_lookup(&typos_vars::VARS_DICTIONARY, word)
96100
.map(|variants| self.select_variant(variants))
97101
} else {
98102
None
99103
}
100104
}
101105

106+
fn is_vars_enabled(&self) -> bool {
107+
#![allow(clippy::assertions_on_constants)]
108+
debug_assert!(typos_vars::NO_INVALID);
109+
self.locale.is_some()
110+
}
111+
102112
fn select_variant(
103113
&self,
104114
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
@@ -301,7 +311,7 @@ mod test {
301311
typos::tokens::Case::Lower,
302312
0,
303313
));
304-
assert_eq!(correction, Some(Status::Valid));
314+
assert_eq!(correction, None);
305315
}
306316

307317
#[cfg(feature = "vars")]

0 commit comments

Comments
 (0)