Skip to content

Commit 0bc96af

Browse files
authored
Merge pull request #261 from CybercentreCanada/feature/whitelist
Feature/whitelist (dev)
2 parents f8e04ee + 4ab9441 commit 0bc96af

15 files changed

+1306
-1070
lines changed

assemblyline/cachestore/__init__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __init__(self, component, config=None, datastore=None):
2121
config = forge.get_config()
2222

2323
self.component = component
24-
self.datastore = datastore or forge.get_datastore()
24+
self.datastore = datastore or forge.get_datastore(config=config)
2525
self.filestore = FileStore(*config.filestore.cache)
2626

2727
def __enter__(self):
@@ -30,15 +30,15 @@ def __enter__(self):
3030
def __exit__(self, ex_type, exc_val, exc_tb):
3131
self.filestore.close()
3232

33-
def save(self, cache_key, data, ttl=DEFAULT_CACHE_LEN):
33+
def save(self, cache_key, data, ttl=DEFAULT_CACHE_LEN, force=False):
3434
if not COMPONENT_VALIDATOR.match(cache_key):
3535
raise ValueError("Invalid cache_key for cache item. "
3636
"(Only letters, numbers, underscores and dots allowed)")
3737

3838
new_key = f"{self.component}_{cache_key}" if self.component else cache_key
3939

4040
self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
41-
self.filestore.put(new_key, data)
41+
self.filestore.put(new_key, data, force=force)
4242

4343
def get(self, cache_key):
4444
new_key = f"{self.component}_{cache_key}" if self.component else cache_key

assemblyline/common/forge.py

+22-10
Original file line numberDiff line numberDiff line change
@@ -138,31 +138,43 @@ def get_service_queue(service: str, redis=None):
138138
return PriorityQueue(service_queue_name(service), redis)
139139

140140

141-
def get_tag_whitelister(log=None, yml_config=None):
142-
from assemblyline.common.tagging import TagWhitelister, InvalidWhitelist
141+
def get_tag_safelist_data(yml_config=None):
143142

144143
if yml_config is None:
145-
yml_config = "/etc/assemblyline/tag_whitelist.yml"
144+
yml_config = "/etc/assemblyline/tag_safelist.yml"
146145

147-
tag_whitelist_data = {}
148-
default_file = os.path.join(os.path.dirname(__file__), "tag_whitelist.yml")
146+
tag_safelist_data = {}
147+
default_file = os.path.join(os.path.dirname(__file__), "tag_safelist.yml")
149148
if os.path.exists(default_file):
150149
with open(default_file) as default_fh:
151150
default_yml_data = yaml.safe_load(default_fh.read())
152151
if default_yml_data:
153-
tag_whitelist_data.update(default_yml_data)
152+
tag_safelist_data.update(default_yml_data)
154153

155154
# Load modifiers from the yaml config
156155
if os.path.exists(yml_config):
157156
with open(yml_config) as yml_fh:
158157
yml_data = yaml.safe_load(yml_fh.read())
159158
if yml_data:
160-
tag_whitelist_data = recursive_update(tag_whitelist_data, yml_data)
159+
tag_safelist_data = recursive_update(tag_safelist_data, yml_data)
161160

162-
if not tag_whitelist_data:
163-
raise InvalidWhitelist('Could not find any tag_whitelist file to load.')
161+
return tag_safelist_data
164162

165-
return TagWhitelister(tag_whitelist_data, log=log)
163+
164+
def get_tag_safelister(log=None, yml_config=None, config=None, datastore=None):
165+
from assemblyline.common.tagging import TagSafelister, InvalidSafelist
166+
167+
with get_cachestore('system', config=config, datastore=datastore) as cache:
168+
tag_safelist_yml = cache.get('tag_safelist_yml')
169+
if tag_safelist_yml:
170+
tag_safelist_data = yaml.safe_load(tag_safelist_yml)
171+
else:
172+
tag_safelist_data = get_tag_safelist_data(yml_config=yml_config)
173+
174+
if not tag_safelist_data:
175+
raise InvalidSafelist('Could not find any tag_safelist file to load.')
176+
177+
return TagSafelister(tag_safelist_data, log=log)
166178

167179

168180
class CachedObject:

assemblyline/common/tag_whitelist.yml assemblyline/common/tag_safelist.yml

+11-11
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
# Default tag_whitelist.yml file
1+
# Default tag_safelist.yml file
22
#
3-
# The following tags are whitelisted:
3+
# The following tags are safelisted:
44
# - Domains pointing to localhost
55
# - Domain commonly found in XML files, certificates and during dynamic Analysis runs
66
# - IPs in the private network IP space
77
# - URI pointing to IPs in the private network IP space
88
# - URIs commonly found in XML files, certificates and during dynamic Analysis runs
99
#
10-
# Note: - You can override the default tag_whitelist.yml by putting an
11-
# updated version in /etc/assemblyline/tag_whitelist.yml.
10+
# Note: - You can override the default tag_safelist.yml by putting an
11+
# updated version in /etc/assemblyline/tag_safelist.yml.
1212
# - If you want to add values to one of the following tag types,
1313
# you have to copy the default values to the new file.
1414
# - You can nullify value by putting empty object or empty list
1515
# in your new file
1616

1717
# Match section contains tag types and for each tag type
18-
# a list of values that should be whitelisted using a direct
18+
# a list of values that should be safelisted using a direct
1919
# string comparison.
2020
match:
2121
# Direct match to dynamic domains
@@ -64,18 +64,18 @@ match:
6464
- purl.org
6565

6666
# Regex section contains tag types and for each tag type
67-
# a list of regular expression to be run to whitelist
67+
# a list of regular expression to be run to safelist
6868
# the associated tags.
6969
regex:
70-
# Regular expression to whitelist dynamic IPs (Private IPs)
70+
# Regular expression to safelist dynamic IPs (Private IPs)
7171
# note: Since IPs have already been validated, the regular expression in simpler
7272
network.dynamic.ip:
7373
- (?:127\.|10\.|192\.168|172\.1[6-9]\.|172\.2[0-9]\.|172\.3[01]\.).*
74-
# Regular expression to whitelist static IPs (Private IPs)
74+
# Regular expression to safelist static IPs (Private IPs)
7575
# note: Since IPs have already been validated, the regular expression in simpler
7676
network.static.ip:
7777
- (?:127\.|10\.|192\.168|172\.1[6-9]\.|172\.2[0-9]\.|172\.3[01]\.).*
78-
# Regular expression to whitelist dynamic URIs
78+
# Regular expression to safelist dynamic URIs
7979
network.dynamic.uri:
8080
- (?:ftp|http)s?://localhost(?:$|/.*)
8181
- (?:ftp|http)s?://(?:(?:(?:10|127)(?:\.(?:[2](?:[0-5][0-5]|[01234][6-9])|[1][0-9][0-9]|[1-9][0-9]|[0-9])){3})|(?:172\.(?:1[6-9]|2[0-9]|3[0-1])(?:\.(?:2[0-4][0-9]|25[0-5]|[1][0-9][0-9]|[1-9][0-9]|[0-9])){2}|(?:192\.168(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])){2})))(?:$|/.*)
@@ -110,7 +110,7 @@ regex:
110110
- https?://ns\.adobe\.com/xap/1\.0/sType/ResourceEvent#
111111
- https?://purl\.org/dc/elements/1\.1/
112112
- https?://www\.w3\.org/1999/02/22-rdf-syntax-ns#
113-
# Regular expression to whitelist static URIs
113+
# Regular expression to safelist static URIs
114114
network.static.uri:
115115
- (?:ftp|http)s?://localhost(?:$|/.*)
116116
- (?:ftp|http)s?://(?:(?:(?:10|127)(?:\.(?:[2](?:[0-5][0-5]|[01234][6-9])|[1][0-9][0-9]|[1-9][0-9]|[0-9])){3})|(?:172\.(?:1[6-9]|2[0-9]|3[0-1])(?:\.(?:2[0-4][0-9]|25[0-5]|[1][0-9][0-9]|[1-9][0-9]|[0-9])){2}|(?:192\.168(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])){2})))(?:$|/.*)
@@ -144,4 +144,4 @@ regex:
144144
- https?://ns\.adobe\.com/xap/1\.0/mm/
145145
- https?://ns\.adobe\.com/xap/1\.0/sType/ResourceEvent#
146146
- https?://purl\.org/dc/elements/1\.1/
147-
- https?://www\.w3\.org/1999/02/22-rdf-syntax-ns#
147+
- https?://www\.w3\.org/1999/02/22-rdf-syntax-ns#

assemblyline/common/tagging.py

+51-16
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import re
22

3-
from typing import List, Dict
3+
from typing import List, Dict, Set
44

5-
from assemblyline.common.dict_utils import flatten
5+
from assemblyline.common.forge import CachedObject, get_datastore
66
from assemblyline.odm.models.tagging import Tagging
77

88

@@ -16,22 +16,33 @@ def tag_list_to_dict(tag_list: List[Dict]) -> Dict:
1616
return tag_dict
1717

1818

19-
def tag_dict_to_list(tag_dict: Dict) -> List[Dict]:
19+
def tag_dict_to_list(tag_dict: Dict, safelisted: bool = False) -> List[Dict]:
2020
return [
21-
{'type': k, 'value': t, 'short_type': k.rsplit(".", 1)[-1]}
22-
for k, v in flatten(tag_dict).items()
21+
{'safelisted': safelisted, 'type': k, 'value': t, 'short_type': k.rsplit(".", 1)[-1]}
22+
for k, v in tag_dict.items()
2323
if v is not None
2424
for t in v
2525
]
2626

2727

28-
class InvalidWhitelist(Exception):
28+
def get_safelist_key(t_type: str, t_value: str) -> str:
29+
return f"{t_type}__{t_value}"
30+
31+
32+
def get_safelist(ds) -> Set:
33+
return {get_safelist_key(sl['tag']['type'], sl['tag']['value']): True
34+
for sl in ds.safelist.stream_search("type:tag AND enabled:true", as_obj=False)}
35+
36+
37+
class InvalidSafelist(Exception):
2938
pass
3039

3140

32-
class TagWhitelister(object):
41+
class TagSafelister(object):
3342
def __init__(self, data, log=None):
3443
valid_tags = set(Tagging.flat_fields().keys())
44+
self.datastore = get_datastore()
45+
self.safelist = CachedObject(get_safelist, kwargs={'ds': self.datastore}, refresh=300)
3546

3647
self.match = data.get('match', {})
3748
self.regex = data.get('regex', {})
@@ -40,38 +51,62 @@ def __init__(self, data, log=None):
4051
# Validate matches and regex
4152
for section, item in {'match': self.match, 'regex': self.regex}.items():
4253
if not isinstance(item, dict):
43-
raise InvalidWhitelist(f"Section {section} should be of type: DICT")
54+
raise InvalidSafelist(f"Section {section} should be of type: DICT")
4455

4556
for k, v in item.items():
4657
if not isinstance(v, list):
47-
raise InvalidWhitelist(f"Values in the {section} section should all be of type: LIST")
58+
raise InvalidSafelist(f"Values in the {section} section should all be of type: LIST")
4859

4960
if k not in valid_tags:
50-
raise InvalidWhitelist(f"Key ({k}) in the {section} section is not a valid tag.")
61+
raise InvalidSafelist(f"Key ({k}) in the {section} section is not a valid tag.")
5162

5263
if section == 'regex':
5364
self.regex[k] = [re.compile(x) for x in v]
5465

55-
def is_whitelisted(self, t_type, t_value):
66+
def is_safelisted(self, t_type, t_value):
67+
if self.safelist.get(get_safelist_key(t_type, t_value), False):
68+
if self.log:
69+
self.log.info(f"Tag '{t_type}' with value '{t_value}' was safelisted.")
70+
return True
71+
5672
for match in self.match.get(t_type, []):
5773
if t_value == match:
5874
if self.log:
59-
self.log.info(f"Tag '{t_type}' with value '{t_value}' was whitelisted by match rule.")
75+
self.log.info(f"Tag '{t_type}' with value '{t_value}' was safelisted by match rule.")
6076
return True
6177

6278
for regex in self.regex.get(t_type, []):
6379
if regex.match(t_value):
6480
if self.log:
6581
self.log.info(f"Tag '{t_type}' with value '{t_value}' "
66-
f"was whitelisted by regex '{regex.pattern}'.")
82+
f"was safelisted by regex '{regex.pattern}'.")
6783
return True
6884

6985
return False
7086

71-
def whitelist_many(self, t_type, t_values):
87+
def safelist_many(self, t_type, t_values):
7288
if not isinstance(t_values, list):
7389
t_values = [t_values]
74-
return [x for x in t_values if not self.is_whitelisted(t_type, x)]
90+
91+
tags = []
92+
safelisted_tags = []
93+
for x in t_values:
94+
if self.is_safelisted(t_type, x):
95+
safelisted_tags.append(x)
96+
else:
97+
tags.append(x)
98+
99+
return tags, safelisted_tags
75100

76101
def get_validated_tag_map(self, tag_map):
77-
return {k: self.whitelist_many(k, v) for k, v in tag_map.items() if v is not None}
102+
tags = {}
103+
safelisted_tags = {}
104+
for k, v in tag_map.items():
105+
if v is not None and v != []:
106+
c_tags, c_safelisted_tags = self.safelist_many(k, v)
107+
if c_tags:
108+
tags[k] = c_tags
109+
if c_safelisted_tags:
110+
safelisted_tags[k] = c_safelisted_tags
111+
112+
return tags, safelisted_tags

assemblyline/datastore/helper.py

+41-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from assemblyline.odm.models.user import User
3333
from assemblyline.odm.models.user_favorites import UserFavorites
3434
from assemblyline.odm.models.user_settings import UserSettings
35+
from assemblyline.odm.models.safelist import Safelist
3536
from assemblyline.odm.models.workflow import Workflow
3637
from assemblyline.remote.datatypes.lock import Lock
3738

@@ -60,6 +61,7 @@ def __init__(self, datastore_object):
6061
self.ds.register('user_avatar')
6162
self.ds.register('user_favorites', UserFavorites)
6263
self.ds.register('user_settings', UserSettings)
64+
self.ds.register('safelist', Safelist)
6365
self.ds.register('workflow', Workflow)
6466

6567
def __enter__(self):
@@ -160,6 +162,10 @@ def user_settings(self) -> Collection:
160162
def vm(self) -> Collection:
161163
return self.ds.vm
162164

165+
@property
166+
def safelist(self) -> Collection:
167+
return self.ds.safelist
168+
163169
@property
164170
def workflow(self) -> Collection:
165171
return self.ds.workflow
@@ -735,6 +741,7 @@ def get_summary_from_keys(self, keys, cl_engine=forge.get_classification(), user
735741
"attack_matrix": [],
736742
"heuristics": {
737743
"info": [],
744+
"safe": [],
738745
"suspicious": [],
739746
"malicious": []
740747
},
@@ -785,7 +792,9 @@ def get_summary_from_keys(self, keys, cl_engine=forge.get_classification(), user
785792

786793
if section.get('heuristic', False):
787794
# Get the heuristics data
788-
if section['heuristic']['score'] < 100:
795+
if section['heuristic']['score'] < 0:
796+
h_type = "safe"
797+
elif section['heuristic']['score'] < 100:
789798
h_type = "info"
790799
elif section['heuristic']['score'] < 1000:
791800
h_type = "suspicious"
@@ -828,7 +837,25 @@ def get_summary_from_keys(self, keys, cl_engine=forge.get_classification(), user
828837
'h_type': h_type,
829838
'short_type': tag_type.rsplit(".", 1)[-1],
830839
'value': tag,
831-
'key': key
840+
'key': key,
841+
'safelisted': False
842+
})
843+
done_map['tags'].add(cache_key)
844+
845+
# Get safelisted tag data
846+
for tag_type, tags in section.get('safelisted_tags', {}).items():
847+
if tags is not None:
848+
for tag in tags:
849+
cache_key = f"{tag_type}_{tag}_{key}"
850+
851+
if cache_key not in done_map['tags']:
852+
out['tags'].append({
853+
'type': tag_type,
854+
'h_type': h_type,
855+
'short_type': tag_type.rsplit(".", 1)[-1],
856+
'value': tag,
857+
'key': key,
858+
'safelisted': True
832859
})
833860
done_map['tags'].add(cache_key)
834861

@@ -851,7 +878,18 @@ def get_tag_list_from_keys(self, keys):
851878
'type': tag_type,
852879
'short_type': tag_type.rsplit(".", 1)[-1],
853880
'value': tag,
854-
'key': key
881+
'key': key,
882+
'safelisted': False
883+
})
884+
for tag_type, tags in section.get('safelisted_tags', {}).items():
885+
if tags is not None:
886+
for tag in tags:
887+
out.append({
888+
'type': tag_type,
889+
'short_type': tag_type.rsplit(".", 1)[-1],
890+
'value': tag,
891+
'key': key,
892+
'safelisted': True
855893
})
856894

857895
return out

0 commit comments

Comments
 (0)