Skip to content

Commit

Permalink
Merge pull request #118 from CybercentreCanada/windows_path
Browse files Browse the repository at this point in the history
Handle more types of windows paths
  • Loading branch information
cccs-jh authored Mar 7, 2025
2 parents f5da338 + 22c5a97 commit 13e1c1b
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 41 deletions.
9 changes: 7 additions & 2 deletions src/multidecoder/decoders/filename.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,22 @@
if TYPE_CHECKING:
from multidecoder.node import Node

EXECUTABLE_TYPE = "executable.filename"
LIBRARY_TYPE = "executable.library.filename"

EXT_MAP = {b".dll": LIBRARY_TYPE, b".exe": EXECUTABLE_TYPE}

EXECUTABLE_RE = rb"(?i)\b\w+[.]exe\b"
LIBRARY_RE = rb"(?i)\b\w+[.]dll\b"


@decoder
def find_executable_name(data: bytes) -> list[Node]:
"""Find exe files"""
return regex_hits("executable.filename", EXECUTABLE_RE, data)
return regex_hits(EXECUTABLE_TYPE, EXECUTABLE_RE, data)


@decoder
def find_library(data: bytes) -> list[Node]:
"""Find dll files"""
return regex_hits("executable.library.filename", LIBRARY_RE, data)
return regex_hits(EXECUTABLE_TYPE, LIBRARY_RE, data)
57 changes: 51 additions & 6 deletions src/multidecoder/decoders/path.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
from __future__ import annotations

from typing import TYPE_CHECKING
import ntpath

import regex as re

from multidecoder.decoders.filename import EXT_MAP
from multidecoder.decoders.network import is_domain, parse_ip
from multidecoder.hit import regex_hits
from multidecoder.node import Node
from multidecoder.registry import decoder

if TYPE_CHECKING:
from multidecoder.node import Node

# Posix style paths
PATH_RE = rb"[.]?[.]?/(\w{3,}/)+[\w.]{3,}"
WINDOWS_PATH_RE = rb"(?:[A-Z]:\\?|[.]\\|[.][.]\\|\\)(?:\w{3,}\\)+[\w.]{3,}"

# Windows Paths
# See https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
WINDOWS_PATH_RE = (
rb"(?i)(?:\\\\[.?]\\(?:[a-z]:\\|UNC\\|Volume\{[a-z0-9-]{36}\}\\)?" # DOS device path
rb"|\\\\[\w.-]+(?:@SSL)?(?:@\d{,5})?\\(?:[a-z]$\\)?" # UNC path
rb"|[a-z]:\\?|\\)?" # absolute or drive relative path
rb"(?:(?:.|..|[\w.-]{3,})\\)+" # path segments
rb"[\w.-]{3,}" # filename
)


@decoder
Expand All @@ -19,4 +31,37 @@ def find_path(data: bytes) -> list[Node]:

@decoder
def find_windows_path(data: bytes) -> list[Node]:
return regex_hits("windows.path", WINDOWS_PATH_RE, data)
output = []
for match in re.finditer(WINDOWS_PATH_RE, data):
path = match.group()
length = len(path)
path = ntpath.normpath(path)
obfuscation = "windows.dotpath" if len(path) < length else ""
children = []
segments = path.split(b"\\")
if path.startswith((Rb"\\.", Rb"\\?")):
path_type = "windows.device.path"
if segments[3].upper() == b"UNC":
hostname = segments[4].split(b"@", maxsplit=1)[0]
try:
children.append(parse_ip(hostname).shift(8))
except ValueError:
if is_domain(hostname):
children.append(Node("network.domain", hostname, "", 8, 8 + len(hostname)))
elif path.startswith(Rb"\\"):
path_type = "windows.unc.path"
hostname = segments[2].split(b"@", maxsplit=1)[0]
try:
children.append(parse_ip(hostname).shift(2))
except ValueError:
if is_domain(hostname):
children.append(Node("network.domain", hostname, "", 2, 2 + len(hostname)))
else:
path_type = "windows.path"
filename = segments[-1]
basename, extension = ntpath.splitext(filename)
if extension:
type_ = EXT_MAP.get(extension.lower(), "filename")
children.append(Node(type_, filename, "", len(path) - len(filename), len(path)))
output.append(Node(path_type, path, obfuscation, *match.span(), children=children))
return output
161 changes: 129 additions & 32 deletions tests/test_decoders/test_path.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pytest
import regex as re

from multidecoder.decoders.path import (
Expand All @@ -6,47 +7,143 @@
find_path,
find_windows_path,
)
from multidecoder.node import Node


def test_empty_path():
assert not re.search(PATH_RE, b"")


def test_empty_windows():
assert not re.search(WINDOWS_PATH_RE, b"")


def test_absolute_path():
assert re.match(PATH_RE, b"/path/file.txt")


def test_absolute_windows_path():
assert re.match(WINDOWS_PATH_RE, b"\\path\\file.txt")


def test_dot_path():
assert re.match(PATH_RE, b"./path/file.txt")


def test_dot_windows_path():
assert re.match(WINDOWS_PATH_RE, b".\\path\\file.txt")


def test_dotdot_path():
assert re.match(PATH_RE, b"../path/file.txt")


def test_dotdot_windows_path():
assert re.match(WINDOWS_PATH_RE, b"..\\path\\file.txt")


def test_windows_drive_path():
assert re.match(WINDOWS_PATH_RE, b"C:\\path\\file.txt")
@pytest.mark.parametrize(
"path",
[
b"/path/file.txt",
b"./path/file.txt",
b"../path/file.txt",
],
)
def test_path_re(path):
assert re.match(PATH_RE, path)


def test_find_path():
assert find_path(b"/path/file.txt")


def test_find_windows_path():
assert find_windows_path(b"\\path\\file.txt")
@pytest.mark.parametrize(
"path",
[
# https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
Rb"C:\Documents\Newsletters\Summer2018.pdf",
# Rb"\Program Files\Custom Utilities\StringFinder.exe" TODO: find way to support spaces without fpos
Rb"2018\January.xlsx",
Rb"..\Publications\TravelBrochure.pdf",
Rb"C:\Projects\apilibrary\apilibrary.sln",
Rb"C:Projects\apilibrary\apilibrary.sln",
Rb"\\system07\C$\Test\Foo.txt",
Rb"\\Server2\Share\Test\Foo.txt",
Rb"\\.\C:\Test\Foo.txt",
Rb"\\?\C:\Test\Foo.txt",
Rb"\\.\Volume{b75e2c83-0000-0000-0000-602f00000000}\Test\Foo.txt",
Rb"\\?\Volume{b75e2c83-0000-0000-0000-602f00000000}\Test\Foo.txt",
Rb"\\.\UNC\Server\Share\Test\Foo.txt",
Rb"\\?\UNC\Server\Share\Test\Foo.txt",
Rb"c:\temp\test-file.txt",
Rb"\\127.0.0.1\c$\temp\test-file.txt",
Rb"\\LOCALHOST\c$\temp\test-file.txt",
Rb"\\.\c:\temp\test-file.txt",
Rb"\\?\c:\temp\test-file.txt",
Rb"\\.\UNC\LOCALHOST\c$\temp\test-file.txt",
# Additional tests
Rb"\\some-domain.com@SSL\SERVER\file",
Rb"\temp\test-file.txt",
Rb".\temp\test-file.txt",
Rb"..\temp\test-file.txt",
],
)
def test_windows_path_re(path):
assert re.search(WINDOWS_PATH_RE, path).group() == path


@pytest.mark.parametrize("fpos", [b""])
def test_windows_path_re_fpos(fpos):
assert not re.search(WINDOWS_PATH_RE, fpos)


@pytest.mark.parametrize(
("path", "result"),
[
(
Rb"c:\temp\test-file.txt",
[
Node(
"windows.path",
Rb"c:\temp\test-file.txt",
"",
0,
21,
children=[Node("filename", b"test-file.txt", "", 8, 21)],
)
],
),
(
Rb"\\127.0.0.1\c$\temp\test-file.txt",
[
Node(
"windows.unc.path",
Rb"\\127.0.0.1\c$\temp\test-file.txt",
"",
0,
33,
children=[
Node("network.ip", b"127.0.0.1", "", 2, 11),
Node("filename", b"test-file.txt", "", 20, 33),
],
)
],
),
(
Rb"\\some-domain.com@SSL\SERVER\file",
[
Node(
"windows.unc.path",
Rb"\\some-domain.com@SSL\SERVER\file",
"",
0,
33,
children=[Node("network.domain", Rb"some-domain.com", "", 2, 17)],
)
],
),
(
Rb"\\?\UNC\127.0.0.1\path\to\file.exe",
[
Node(
"windows.device.path",
Rb"\\?\UNC\127.0.0.1\path\to\file.exe",
"",
0,
34,
children=[
Node("network.ip", b"127.0.0.1", "", 8, 17),
Node("executable.filename", b"file.exe", "", 26, 34),
],
),
],
),
(
Rb"c:\temp\\\\\\foo\..\\.\.\\test-file",
[
Node(
"windows.path",
Rb"c:\temp\test-file",
"windows.dotpath",
0,
35,
)
],
),
],
)
def test_find_windows_path(path, result):
assert find_windows_path(path) == result
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ envlist = py38,py39

[testenv]
deps = pytest
commands = pytest
commands = pytest {posargs}

0 comments on commit 13e1c1b

Please sign in to comment.