Skip to content

Commit fe5dde8

Browse files
authored
Add Lexer for NDISASM (#933)
The Netwide Disassembler outputs text that is assembly prefixed with memory offsets and opcode bytes.
1 parent 6dd9f26 commit fe5dde8

File tree

3 files changed

+243
-0
lines changed

3 files changed

+243
-0
lines changed

lexers/embedded/ndisasm.xml

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
<lexer>
2+
<config>
3+
<name>NDISASM</name>
4+
<alias>ndisasm</alias>
5+
<mime_type>text/x-disasm</mime_type>
6+
<case_insensitive>true</case_insensitive>
7+
<priority>0.5</priority> <!-- Lower than NASM -->
8+
</config>
9+
<rules>
10+
<state name="root">
11+
<rule pattern="^[0-9A-Za-z]+">
12+
<token type="CommentSpecial"/>
13+
<push state="offset"/>
14+
</rule>
15+
</state>
16+
<state name="offset">
17+
<rule pattern="[0-9A-Za-z]+">
18+
<token type="CommentSpecial"/>
19+
<push state="assembly"/>
20+
</rule>
21+
<rule>
22+
<include state="whitespace"/>
23+
</rule>
24+
</state>
25+
<state name="punctuation">
26+
<rule pattern="[,():\[\]]+">
27+
<token type="Punctuation"/>
28+
</rule>
29+
<rule pattern="[&amp;|^&lt;&gt;+*/%~-]+">
30+
<token type="Operator"/>
31+
</rule>
32+
<rule pattern="[$]+">
33+
<token type="KeywordConstant"/>
34+
</rule>
35+
<rule pattern="seg|wrt|strict">
36+
<token type="OperatorWord"/>
37+
</rule>
38+
<rule pattern="byte|[dq]?word">
39+
<token type="KeywordType"/>
40+
</rule>
41+
</state>
42+
<state name="assembly">
43+
<rule>
44+
<include state="whitespace"/>
45+
</rule>
46+
<rule pattern="[a-z$._?][\w$.?#@~]*:">
47+
<token type="NameLabel"/>
48+
</rule>
49+
<rule pattern="([a-z$._?][\w$.?#@~]*)(\s+)(equ)">
50+
<bygroups>
51+
<token type="NameConstant"/>
52+
<token type="KeywordDeclaration"/>
53+
<token type="KeywordDeclaration"/>
54+
</bygroups>
55+
<push state="instruction-args"/>
56+
</rule>
57+
<rule pattern="BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|EXPORT|LIBRARY|MODULE">
58+
<token type="Keyword"/>
59+
<push state="instruction-args"/>
60+
</rule>
61+
<rule pattern="(?:res|d)[bwdqt]|times">
62+
<token type="KeywordDeclaration"/>
63+
<push state="instruction-args"/>
64+
</rule>
65+
<rule pattern="[a-z$._?][\w$.?#@~]*">
66+
<token type="NameFunction"/>
67+
<push state="instruction-args"/>
68+
</rule>
69+
<rule pattern="[\r\n]+">
70+
<token type="Text"/>
71+
<pop depth="2"/>
72+
</rule>
73+
</state>
74+
<state name="instruction-args">
75+
<rule pattern="&#34;(\\&#34;|[^&#34;\n])*&#34;|&#39;(\\&#39;|[^&#39;\n])*&#39;|`(\\`|[^`\n])*`">
76+
<token type="LiteralString"/>
77+
</rule>
78+
<rule pattern="(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)">
79+
<token type="LiteralNumberHex"/>
80+
</rule>
81+
<rule pattern="[0-7]+q">
82+
<token type="LiteralNumberOct"/>
83+
</rule>
84+
<rule pattern="[01]+b">
85+
<token type="LiteralNumberBin"/>
86+
</rule>
87+
<rule pattern="[0-9]+\.e?[0-9]+">
88+
<token type="LiteralNumberFloat"/>
89+
</rule>
90+
<rule pattern="[0-9]+">
91+
<token type="LiteralNumberInteger"/>
92+
</rule>
93+
<rule>
94+
<include state="punctuation"/>
95+
</rule>
96+
<rule pattern="r[0-9][0-5]?[bwd]|[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]">
97+
<token type="NameBuiltin"/>
98+
</rule>
99+
<rule pattern="[a-z$._?][\w$.?#@~]*">
100+
<token type="NameVariable"/>
101+
</rule>
102+
<rule pattern="[\r\n]+">
103+
<token type="Text"/>
104+
<pop depth="3"/>
105+
</rule>
106+
<rule>
107+
<include state="whitespace"/>
108+
</rule>
109+
</state>
110+
<state name="whitespace">
111+
<rule pattern="\n">
112+
<token type="Text"/>
113+
<pop depth="2"/>
114+
</rule>
115+
<rule pattern="[ \t]+">
116+
<token type="Text"/>
117+
</rule>
118+
<rule pattern=";.*">
119+
<token type="CommentSingle"/>
120+
</rule>
121+
</state>
122+
</rules>
123+
</lexer>

lexers/testdata/ndisasm.actual

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
00000000 B013 mov al,0x13
2+
00000002 CD10 int 0x10
3+
00000004 B9027D mov cx,0x7d02
4+
00000007 51 push cx
5+
00000008 6800A0 push word 0xa000
6+
0000000B 07 pop es
7+
0000000C B00F mov al,0xf
8+
0000000E F3AA rep stosb
9+
00000010 59 pop cx
10+
00000011 B004 mov al,0x4
11+
00000013 F3AA rep stosb
12+
00000015 CD16 int 0x16

lexers/testdata/ndisasm.expected

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
[
2+
{"type":"CommentSpecial","value":"00000000"},
3+
{"type":"Text","value":" "},
4+
{"type":"CommentSpecial","value":"B013"},
5+
{"type":"Text","value":" "},
6+
{"type":"NameFunction","value":"mov"},
7+
{"type":"Text","value":" "},
8+
{"type":"NameBuiltin","value":"al"},
9+
{"type":"Punctuation","value":","},
10+
{"type":"LiteralNumberHex","value":"0x13"},
11+
{"type":"Text","value":"\n"},
12+
{"type":"CommentSpecial","value":"00000002"},
13+
{"type":"Text","value":" "},
14+
{"type":"CommentSpecial","value":"CD10"},
15+
{"type":"Text","value":" "},
16+
{"type":"NameFunction","value":"int"},
17+
{"type":"Text","value":" "},
18+
{"type":"LiteralNumberHex","value":"0x10"},
19+
{"type":"Text","value":"\n"},
20+
{"type":"CommentSpecial","value":"00000004"},
21+
{"type":"Text","value":" "},
22+
{"type":"CommentSpecial","value":"B9027D"},
23+
{"type":"Text","value":" "},
24+
{"type":"NameFunction","value":"mov"},
25+
{"type":"Text","value":" "},
26+
{"type":"NameBuiltin","value":"cx"},
27+
{"type":"Punctuation","value":","},
28+
{"type":"LiteralNumberHex","value":"0x7d02"},
29+
{"type":"Text","value":"\n"},
30+
{"type":"CommentSpecial","value":"00000007"},
31+
{"type":"Text","value":" "},
32+
{"type":"CommentSpecial","value":"51"},
33+
{"type":"Text","value":" "},
34+
{"type":"NameFunction","value":"push"},
35+
{"type":"Text","value":" "},
36+
{"type":"NameBuiltin","value":"cx"},
37+
{"type":"Text","value":"\n"},
38+
{"type":"CommentSpecial","value":"00000008"},
39+
{"type":"Text","value":" "},
40+
{"type":"CommentSpecial","value":"6800A0"},
41+
{"type":"Text","value":" "},
42+
{"type":"NameFunction","value":"push"},
43+
{"type":"Text","value":" "},
44+
{"type":"KeywordType","value":"word"},
45+
{"type":"Text","value":" "},
46+
{"type":"LiteralNumberHex","value":"0xa000"},
47+
{"type":"Text","value":"\n"},
48+
{"type":"CommentSpecial","value":"0000000B"},
49+
{"type":"Text","value":" "},
50+
{"type":"CommentSpecial","value":"07"},
51+
{"type":"Text","value":" "},
52+
{"type":"NameFunction","value":"pop"},
53+
{"type":"Text","value":" "},
54+
{"type":"NameBuiltin","value":"es"},
55+
{"type":"Text","value":"\n"},
56+
{"type":"CommentSpecial","value":"0000000C"},
57+
{"type":"Text","value":" "},
58+
{"type":"CommentSpecial","value":"B00F"},
59+
{"type":"Text","value":" "},
60+
{"type":"NameFunction","value":"mov"},
61+
{"type":"Text","value":" "},
62+
{"type":"NameBuiltin","value":"al"},
63+
{"type":"Punctuation","value":","},
64+
{"type":"LiteralNumberHex","value":"0xf"},
65+
{"type":"Text","value":"\n"},
66+
{"type":"CommentSpecial","value":"0000000E"},
67+
{"type":"Text","value":" "},
68+
{"type":"CommentSpecial","value":"F3AA"},
69+
{"type":"Text","value":" "},
70+
{"type":"NameFunction","value":"rep"},
71+
{"type":"Text","value":" "},
72+
{"type":"NameVariable","value":"stosb"},
73+
{"type":"Text","value":"\n"},
74+
{"type":"CommentSpecial","value":"00000010"},
75+
{"type":"Text","value":" "},
76+
{"type":"CommentSpecial","value":"59"},
77+
{"type":"Text","value":" "},
78+
{"type":"NameFunction","value":"pop"},
79+
{"type":"Text","value":" "},
80+
{"type":"NameBuiltin","value":"cx"},
81+
{"type":"Text","value":"\n"},
82+
{"type":"CommentSpecial","value":"00000011"},
83+
{"type":"Text","value":" "},
84+
{"type":"CommentSpecial","value":"B004"},
85+
{"type":"Text","value":" "},
86+
{"type":"NameFunction","value":"mov"},
87+
{"type":"Text","value":" "},
88+
{"type":"NameBuiltin","value":"al"},
89+
{"type":"Punctuation","value":","},
90+
{"type":"LiteralNumberHex","value":"0x4"},
91+
{"type":"Text","value":"\n"},
92+
{"type":"CommentSpecial","value":"00000013"},
93+
{"type":"Text","value":" "},
94+
{"type":"CommentSpecial","value":"F3AA"},
95+
{"type":"Text","value":" "},
96+
{"type":"NameFunction","value":"rep"},
97+
{"type":"Text","value":" "},
98+
{"type":"NameVariable","value":"stosb"},
99+
{"type":"Text","value":"\n"},
100+
{"type":"CommentSpecial","value":"00000015"},
101+
{"type":"Text","value":" "},
102+
{"type":"CommentSpecial","value":"CD16"},
103+
{"type":"Text","value":" "},
104+
{"type":"NameFunction","value":"int"},
105+
{"type":"Text","value":" "},
106+
{"type":"LiteralNumberHex","value":"0x16"},
107+
{"type":"Text","value":"\n"}
108+
]

0 commit comments

Comments
 (0)