Skip to content

Commit a00831c

Browse files
author
Ed Page
committed
fix: Ignore numbers as identifiers
1 parent cc4b53a commit a00831c

File tree

1 file changed

+23
-4
lines changed

1 file changed

+23
-4
lines changed

typos/src/tokens.rs

+23-4
Original file line numberDiff line numberDiff line change
@@ -78,23 +78,33 @@ impl Parser {
7878
}
7979

8080
pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
81-
let ignore_hex = self.ignore_hex;
8281
self.words_str
8382
.find_iter(content)
84-
.filter(move |m| !ignore_hex || !is_hex(m.as_str().as_bytes()))
83+
.filter(move |m| self.accept(m.as_str().as_bytes()))
8584
.map(|m| Identifier::new_unchecked(m.as_str(), m.start()))
8685
}
8786

8887
pub fn parse_bytes<'c>(&'c self, content: &'c [u8]) -> impl Iterator<Item = Identifier<'c>> {
89-
let ignore_hex = self.ignore_hex;
9088
self.words_bytes
9189
.find_iter(content)
92-
.filter(move |m| !ignore_hex || !is_hex(m.as_bytes()))
90+
.filter(move |m| self.accept(m.as_bytes()))
9391
.filter_map(|m| {
9492
let s = std::str::from_utf8(m.as_bytes()).ok();
9593
s.map(|s| Identifier::new_unchecked(s, m.start()))
9694
})
9795
}
96+
97+
fn accept(&self, contents: &[u8]) -> bool {
98+
if is_number(contents) {
99+
return false;
100+
};
101+
102+
if self.ignore_hex {
103+
return !is_hex(contents);
104+
}
105+
106+
true
107+
}
98108
}
99109

100110
impl Default for Parser {
@@ -103,6 +113,15 @@ impl Default for Parser {
103113
}
104114
}
105115

116+
fn is_number(ident: &[u8]) -> bool {
117+
lazy_static::lazy_static! {
118+
// `_`: number literal separator in Rust and other languages
119+
// `'`: number literal separator in C++
120+
static ref DIGITS: regex::bytes::Regex = regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap();
121+
}
122+
DIGITS.is_match(ident)
123+
}
124+
106125
fn is_hex(ident: &[u8]) -> bool {
107126
lazy_static::lazy_static! {
108127
// `_`: number literal separator in Rust and other languages

0 commit comments

Comments
 (0)