@@ -78,23 +78,33 @@ impl Parser {
78
78
}
79
79
80
80
pub fn parse < ' c > ( & ' c self , content : & ' c str ) -> impl Iterator < Item = Identifier < ' c > > {
81
- let ignore_hex = self . ignore_hex ;
82
81
self . words_str
83
82
. find_iter ( content)
84
- . filter ( move |m| !ignore_hex || ! is_hex ( m. as_str ( ) . as_bytes ( ) ) )
83
+ . filter ( move |m| self . accept ( m. as_str ( ) . as_bytes ( ) ) )
85
84
. map ( |m| Identifier :: new_unchecked ( m. as_str ( ) , m. start ( ) ) )
86
85
}
87
86
88
87
pub fn parse_bytes < ' c > ( & ' c self , content : & ' c [ u8 ] ) -> impl Iterator < Item = Identifier < ' c > > {
89
- let ignore_hex = self . ignore_hex ;
90
88
self . words_bytes
91
89
. find_iter ( content)
92
- . filter ( move |m| !ignore_hex || ! is_hex ( m. as_bytes ( ) ) )
90
+ . filter ( move |m| self . accept ( m. as_bytes ( ) ) )
93
91
. filter_map ( |m| {
94
92
let s = std:: str:: from_utf8 ( m. as_bytes ( ) ) . ok ( ) ;
95
93
s. map ( |s| Identifier :: new_unchecked ( s, m. start ( ) ) )
96
94
} )
97
95
}
96
+
97
+ fn accept ( & self , contents : & [ u8 ] ) -> bool {
98
+ if is_number ( contents) {
99
+ return false ;
100
+ } ;
101
+
102
+ if self . ignore_hex {
103
+ return !is_hex ( contents) ;
104
+ }
105
+
106
+ true
107
+ }
98
108
}
99
109
100
110
impl Default for Parser {
@@ -103,6 +113,15 @@ impl Default for Parser {
103
113
}
104
114
}
105
115
116
+ fn is_number ( ident : & [ u8 ] ) -> bool {
117
+ lazy_static:: lazy_static! {
118
+ // `_`: number literal separator in Rust and other languages
119
+ // `'`: number literal separator in C++
120
+ static ref DIGITS : regex:: bytes:: Regex = regex:: bytes:: Regex :: new( r#"^[0-9_']+$"# ) . unwrap( ) ;
121
+ }
122
+ DIGITS . is_match ( ident)
123
+ }
124
+
106
125
fn is_hex ( ident : & [ u8 ] ) -> bool {
107
126
lazy_static:: lazy_static! {
108
127
// `_`: number literal separator in Rust and other languages
0 commit comments