Skip to content

Commit 08efd38

Browse files
committed
Be strict about escape sequences in bytes
1 parent 27f180a commit 08efd38

File tree

2 files changed

+41
-6
lines changed

2 files changed

+41
-6
lines changed

lib/rule_engine/parser/__init__.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,28 @@
3434
import codecs
3535
import collections
3636
import types as pytypes
37+
import re
3738

3839
from .. import ast
3940
from .. import errors
4041
from .base import ParserBase
4142
from .utilities import timedelta_regex
4243

4344
literal_eval = pyast.literal_eval
45+
re
46+
def _repl_byte_escape(match):
47+
token = match.group(1)
48+
if token[0] == ord('x'): # x
49+
return bytes([int(token[1:], 16)])
50+
table = {
51+
b't': '\t'.encode(),
52+
b'n': '\n'.encode(),
53+
b'r': '\r'.encode(),
54+
b'"': '\"'.encode(),
55+
b"'": '\''.encode(),
56+
b'\\': '\\'.encode()
57+
}
58+
return table[token]
4459

4560
class _DeferredAstNode(object):
4661
__slots__ = ('cls', 'args', 'kwargs', 'method')
@@ -418,10 +433,17 @@ def p_expression_boolean(self, p):
418433

419434
def p_expression_bytes(self, p):
420435
'object : BYTES'
436+
value = p[1][1:-1]
421437
try:
422-
value = literal_eval('b' + p[1])
423-
except Exception:
438+
value = codecs.encode(value, 'unicode-escape').decode()
439+
except UnicodeError:
424440
raise errors.BytesSyntaxError('invalid bytes literal', p[1][1:-1]) from None
441+
value = value.replace('\\\\', '\\')
442+
if (match := re.search(r'(?<!\\)(?:\\\\)*\\(?!\\|t|n|r|"|\'|x[0-9A-Fa-f]{2}).?', value)):
443+
raise errors.BytesSyntaxError(f"invalid bytes literal (invalid escape at position {match.start()})", p[1][1:-1])
444+
value = value.encode()
445+
value = re.sub(br'\\(x[0-9A-Fa-f]{2})', _repl_byte_escape, value)
446+
value = re.sub(br'\\(.)', _repl_byte_escape, value)
425447
p[0] = _DeferredAstNode(ast.BytesExpression, args=(self.context, value))
426448

427449
def p_expression_datetime(self, p):

tests/parser.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -313,11 +313,24 @@ def test_parse_boolean(self):
313313

314314
def test_parse_bytes(self):
315315
self.assertLiteralStatementEqual('b""', ast.BytesExpression, b'')
316-
self.assertLiteralStatementEqual('b"dead\x13\x37"', ast.BytesExpression, b'dead\x13\x37')
317-
self.assertLiteralStatementEqual('b"\\xde\\xad"', ast.BytesExpression, b'\xde\xad')
318-
self.assertLiteralStatementEqual('b"\\xDE\\xAD"', ast.BytesExpression, b'\xde\xad')
316+
317+
def test_parse_bytes_escape(self):
318+
self.assertLiteralStatementEqual('b""', ast.BytesExpression, b'')
319+
self.assertLiteralStatementEqual(r'b"foo\tbar"', ast.BytesExpression, b'foo\x09bar')
320+
self.assertLiteralStatementEqual(r'b"foo\nbar"', ast.BytesExpression, b'foo\x0abar')
321+
self.assertLiteralStatementEqual(r'b"foo\rbar"', ast.BytesExpression, b'foo\x0dbar')
322+
self.assertLiteralStatementEqual(r'b"foo\"bar"', ast.BytesExpression, b'foo\x22bar')
323+
self.assertLiteralStatementEqual(r'b"foo\'bar"', ast.BytesExpression, b'foo\x27bar')
324+
self.assertLiteralStatementEqual(r'b"foo\\bar"', ast.BytesExpression, b'foo\x5cbar')
325+
with self.assertRaises(errors.BytesSyntaxError):
326+
self._parse(r'b"\u0123"', self.context)
327+
328+
def test_parse_bytes_escape_hexl(self):
329+
self.assertLiteralStatementEqual(r'b"dead\x13\x37"', ast.BytesExpression, b'dead\x13\x37')
330+
self.assertLiteralStatementEqual(r'b"\xde\xad"', ast.BytesExpression, b'\xde\xad')
331+
self.assertLiteralStatementEqual(r'b"\xDE\xAD"', ast.BytesExpression, b'\xde\xad')
319332
with self.assertRaises(errors.BytesSyntaxError):
320-
self._parse('b"\\xyz"', self.context)
333+
self._parse(r'b"\xyz"', self.context)
321334

322335
def test_parse_datetime(self):
323336
self.assertLiteralStatementEqual('d"2016-10-15"', ast.DatetimeExpression, datetime.datetime(2016, 10, 15, tzinfo=dateutil.tz.tzlocal()))

0 commit comments

Comments
 (0)