Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Malware clustering #351

Merged
merged 2 commits into from
Dec 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions analyzers/MalwareClustering/MalwareClustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"name": "MalwareClustering_Search",
"version": "1.0",
"author": "LDO-CERT",
"url": "https://github.com/LDO-CERT/Cortex-Analyzers",
"license": "AGPL-V3",
"description": "Uses ApiVectors to find similarities between malware samples.",
"dataTypeList": ["file", "hash"],
"baseConfig": "MalwareClustering",
"config": {
"check_tlp": true,
"service": "search",
"max_tlp": 3
},
"command": "MalwareClustering/malwareclustering.py",
"configurationItems": [
{
"name": "n4j_host",
"description": "Neo4j server host",
"type": "string",
"multi": false,
"required": true
},
{
"name": "n4j_port",
"description": "Neo4j server port",
"type": "number",
"multi": false,
"required": true
},
{
"name": "n4j_user",
"description": "Neo4j server user",
"type": "string",
"multi": false,
"required": true
},
{
"name": "n4j_pwd",
"description": "Neo4j server password",
"type": "string",
"multi": false,
"required": true
},
{
"name": "threshold",
"description": "ApiScout correlation threshold",
"type": "string",
"multi": false,
"required": true
}

]
}
16 changes: 16 additions & 0 deletions analyzers/MalwareClustering/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Prerequisites:

## Required:
- [neo4j db instance](https://neo4j.com/download/)
- pip3 install -r requirements

## Optional:
- bulk import known malware samples in db from:
- [cloned malpedia repo](https://malpedia.caad.fkie.fraunhofer.de/)
- folder with some malicious sample with optional json malpedia like definition

```
from malwareclustering_api import Api
test = Api(host='127.0.0.1', port=7474, user='neo4j', password='password', threshold=40, folder_path='/home/user/malware_samples')
test.process()
```
14,083 changes: 14,083 additions & 0 deletions analyzers/MalwareClustering/data/UserDB.TXT

Large diffs are not rendered by default.

1,024 changes: 1,024 additions & 0 deletions analyzers/MalwareClustering/data/winapi1024v1.txt

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions analyzers/MalwareClustering/malwareclustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
# encoding: utf-8
import time
import hashlib

from malwareclustering_api import Api
from cortexutils.analyzer import Analyzer


try:
from StringIO import StringIO
except ImportError:
from io import StringIO


class MalwareClusteringAnalyzer(Analyzer):

def __init__(self):
Analyzer.__init__(self)
self.service = self.get_param('config.service', None, 'Service parameter is missing')
self.n4j_host = self.get_param('config.n4j_host', None, 'Missing Neo4J Host')
self.n4j_port = self.get_param('config.n4j_port', None, 'Missing Neo4J Port')
self.n4j_user = self.get_param('config.n4j_user', None, 'Missing Neo4J User')
self.n4j_password = self.get_param('config.n4j_pwd', None, 'Missing Neo4J Password')
self.threshold = self.get_param('config.threshold', None, 'Missing ApiScout Threshold')
self.polling_interval = self.get_param('config.polling_interval', 60)
self.n4j_api = Api(self.n4j_host,self.n4j_port,self.n4j_user,self.n4j_password,self.threshold)

def summary(self, raw):
taxonomies = []
namespace = "MalwareClustering"
predicate = "Family"
score = -1

result = { "has_result": True }

value = raw.get('info', {}).get('tag', None) if raw.get('info', {}) else None

if not value:
top_family = max([(x['tag'], x['max']) for x in raw.get('families', [])], key = lambda x: x[1]) if raw.get('families', None) else None

if top_family:
(value, score) = top_family
if score > 70:
level = "malicious"
elif score > 40:
level = "suspicious"
else:
value = "No info"
level = "info"
else:
level = "malicious"

taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
return {"taxonomies": taxonomies}

def run(self):
if self.service == 'search':
if self.data_type == 'file':
filename = self.get_param('filename', 'noname.ext')
filepath = self.get_param('file', None, 'File is missing')
return self.report(self.n4j_api.process_file(filepath, filename))
elif self.data_type == 'hash':
data = self.get_param('data', None, 'Data is missing')
return self.report(self.n4j_api.search_hash(data))
else:
self.error('Invalid data type')
else:
self.error('Invalid service')


if __name__ == '__main__':
MalwareClusteringAnalyzer().run()
Loading