Skip to content

Commit

Permalink
Merge pull request #125 from CybercentreCanada/update/ioc-extraction-…
Browse files Browse the repository at this point in the history
…update

Rework when enforce_min_chars gets applied
  • Loading branch information
cccs-kevin authored Jan 9, 2024
2 parents 2f63d43 + 9b1d8be commit 69ede8d
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
18 changes: 8 additions & 10 deletions assemblyline_service_utilities/common/dynamic_service_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
# Choosing an arbitrary number, based on https://webmasters.stackexchange.com/questions/16996/maximum-domain-name-length
MAX_DOMAIN_CHARS = 100
MIN_URI_CHARS = 11
MIN_URI_PATH_CHARS = 4

# There are samples that inject themselves for the entire analysis time
# and have the potential to exceed depths of 1000. Also, the assumption with 10 is that no process
Expand Down Expand Up @@ -3150,13 +3149,13 @@ def extract_iocs_from_text_blob(
elif dumps({"ioc_type": "ip", "ioc": ip}) not in result_section.section_body.body:
result_section.add_row(TableRow(ioc_type="ip", ioc=ip))
for domain in sorted(domains):
if enforce_char_min and len(domain) < MIN_DOMAIN_CHARS:
continue
if enforce_domain_char_max and len(domain) > MAX_DOMAIN_CHARS:
continue

# Check if the domain ends with a TLD that is frequently a false positive
if any(domain.lower().endswith(tld) for tld in COMMON_FP_TLDS):
# If a domain matches one of the following criteria, ensure that it is the hostname of a URI, otherwise
# we don't want it
if (
any(domain.lower().endswith(tld) for tld in COMMON_FP_TLDS)
or (enforce_char_min and len(domain) < MIN_DOMAIN_CHARS)
or (enforce_domain_char_max and len(domain) > MAX_DOMAIN_CHARS)
):
is_domain_present_in_uri = False
for uri in uris:
parsed_uri = urlparse(uri.lower())
Expand Down Expand Up @@ -3211,8 +3210,7 @@ def extract_iocs_from_text_blob(
if "//" in uri:
uri = uri.split("//")[1]
for uri_path in findall(URI_PATH, uri):
if enforce_char_min and len(uri_path) < MIN_URI_PATH_CHARS:
continue
# Since the URI is valid at this point, then we should tag the URI path regardless
if add_tag(
result_section,
f"network.{network_tag_type}.uri_path",
Expand Down
24 changes: 21 additions & 3 deletions test/test_dynamic_service_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4399,7 +4399,11 @@ def test_load_from_json():
"sha256": "blah",
}
],
{"extracted": [{'path': 'blah', 'name': 'blah', 'description': 'blah', 'parent_relation': 'EXTRACTED'}]},
{
"extracted": [
{"path": "blah", "name": "blah", "description": "blah", "parent_relation": "EXTRACTED"}
]
},
),
(
[
Expand Down Expand Up @@ -4431,15 +4435,18 @@ def test_load_from_json():
"sub_sig_id": "hollowshunter_exe",
"sub_title": "HollowsHunter Injected Portable Executable",
"sub_body": "a",
"sub_tags": {"value": "123_hollowshunter/hh_process_12345_blah123.something.exe", "tag_type": "dynamic.process.file_name"},
"sub_tags": {
"value": "123_hollowshunter/hh_process_12345_blah123.something.exe",
"tag_type": "dynamic.process.file_name",
},
"extracted": [
{
"description": "blah",
"name": "123_hollowshunter/hh_process_12345_blah123.something.exe",
"parent_relation": "MEMDUMP",
"path": "blah",
},
]
],
},
),
# CAPE-specific memory dump handling
Expand Down Expand Up @@ -8624,6 +8631,17 @@ def test_preprocess_ontology():
},
[],
),
(
"blah https://microsoft.net/ blah",
True,
True,
{
"network.dynamic.domain": ["microsoft.net"],
"network.dynamic.uri": ["https://microsoft.net/"],
"network.dynamic.uri_path": ["/"],
},
[{"uri": "https://microsoft.net/"}],
),
],
)
def test_extract_iocs_from_text_blob(blob, enforce_min, enforce_max, correct_tags, expected_iocs):
Expand Down

0 comments on commit 69ede8d

Please sign in to comment.