Skip to content

Commit

Permalink
Merge pull request #351 from LDO-CERT/MalwareClustering
Browse files Browse the repository at this point in the history
Malware clustering
  • Loading branch information
3c7 authored Dec 17, 2019
2 parents 4400e2a + 5fc8cf0 commit 7693831
Show file tree
Hide file tree
Showing 10 changed files with 16,151 additions and 0 deletions.
54 changes: 54 additions & 0 deletions analyzers/MalwareClustering/MalwareClustering.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"name": "MalwareClustering_Search",
"version": "1.0",
"author": "LDO-CERT",
"url": "https://github.com/LDO-CERT/Cortex-Analyzers",
"license": "AGPL-V3",
"description": "Uses ApiVectors to find similarities between malware samples.",
"dataTypeList": ["file", "hash"],
"baseConfig": "MalwareClustering",
"config": {
"check_tlp": true,
"service": "search",
"max_tlp": 3
},
"command": "MalwareClustering/malwareclustering.py",
"configurationItems": [
{
"name": "n4j_host",
"description": "Neo4j server host",
"type": "string",
"multi": false,
"required": true
},
{
"name": "n4j_port",
"description": "Neo4j server port",
"type": "number",
"multi": false,
"required": true
},
{
"name": "n4j_user",
"description": "Neo4j server user",
"type": "string",
"multi": false,
"required": true
},
{
"name": "n4j_pwd",
"description": "Neo4j server password",
"type": "string",
"multi": false,
"required": true
},
{
"name": "threshold",
"description": "ApiScout correlation threshold",
"type": "string",
"multi": false,
"required": true
}

]
}
16 changes: 16 additions & 0 deletions analyzers/MalwareClustering/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Prerequisites:

## Required:
- [neo4j db instance](https://neo4j.com/download/)
- pip3 install -r requirements

## Optional:
- bulk import known malware samples in db from:
- [cloned malpedia repo](https://malpedia.caad.fkie.fraunhofer.de/)
- folder with some malicious sample with optional json malpedia like definition

```
from malwareclustering_api import Api
test = Api(host='127.0.0.1', port=7474, user='neo4j', password='password', threshold=40, folder_path='/home/user/malware_samples')
test.process()
```
14,083 changes: 14,083 additions & 0 deletions analyzers/MalwareClustering/data/UserDB.TXT

Large diffs are not rendered by default.

1,024 changes: 1,024 additions & 0 deletions analyzers/MalwareClustering/data/winapi1024v1.txt

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions analyzers/MalwareClustering/malwareclustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
# encoding: utf-8
import time
import hashlib

from malwareclustering_api import Api
from cortexutils.analyzer import Analyzer


try:
from StringIO import StringIO
except ImportError:
from io import StringIO


class MalwareClusteringAnalyzer(Analyzer):

def __init__(self):
Analyzer.__init__(self)
self.service = self.get_param('config.service', None, 'Service parameter is missing')
self.n4j_host = self.get_param('config.n4j_host', None, 'Missing Neo4J Host')
self.n4j_port = self.get_param('config.n4j_port', None, 'Missing Neo4J Port')
self.n4j_user = self.get_param('config.n4j_user', None, 'Missing Neo4J User')
self.n4j_password = self.get_param('config.n4j_pwd', None, 'Missing Neo4J Password')
self.threshold = self.get_param('config.threshold', None, 'Missing ApiScout Threshold')
self.polling_interval = self.get_param('config.polling_interval', 60)
self.n4j_api = Api(self.n4j_host,self.n4j_port,self.n4j_user,self.n4j_password,self.threshold)

def summary(self, raw):
taxonomies = []
namespace = "MalwareClustering"
predicate = "Family"
score = -1

result = { "has_result": True }

value = raw.get('info', {}).get('tag', None) if raw.get('info', {}) else None

if not value:
top_family = max([(x['tag'], x['max']) for x in raw.get('families', [])], key = lambda x: x[1]) if raw.get('families', None) else None

if top_family:
(value, score) = top_family
if score > 70:
level = "malicious"
elif score > 40:
level = "suspicious"
else:
value = "No info"
level = "info"
else:
level = "malicious"

taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
return {"taxonomies": taxonomies}

def run(self):
if self.service == 'search':
if self.data_type == 'file':
filename = self.get_param('filename', 'noname.ext')
filepath = self.get_param('file', None, 'File is missing')
return self.report(self.n4j_api.process_file(filepath, filename))
elif self.data_type == 'hash':
data = self.get_param('data', None, 'Data is missing')
return self.report(self.n4j_api.search_hash(data))
else:
self.error('Invalid data type')
else:
self.error('Invalid service')


if __name__ == '__main__':
MalwareClusteringAnalyzer().run()
Loading

0 comments on commit 7693831

Please sign in to comment.