TheHive-Project · 3c7 · Dec 17, 2019 · Oct 5, 2018 · Oct 8, 2018
diff --git a/analyzers/MalwareClustering/MalwareClustering.json b/analyzers/MalwareClustering/MalwareClustering.json
@@ -0,0 +1,54 @@
+{
+    "name": "MalwareClustering_Search",
+    "version": "1.0",
+    "author": "LDO-CERT",
+    "url": "https://github.com/LDO-CERT/Cortex-Analyzers",
+    "license": "AGPL-V3",
+    "description": "Uses ApiVectors to find similarities between malware samples.",
+    "dataTypeList": ["file", "hash"],
+    "baseConfig": "MalwareClustering",
+    "config": {
+        "check_tlp": true,
+        "service": "search",
+        "max_tlp": 3 
+    },
+    "command": "MalwareClustering/malwareclustering.py",
+    "configurationItems": [
+        {
+            "name": "n4j_host",
+            "description": "Neo4j server host",
+            "type": "string",
+            "multi": false,
+            "required": true
+        },
+        {
+            "name": "n4j_port",
+            "description": "Neo4j server port",
+            "type": "number",
+            "multi": false,
+            "required": true
+        },
+        {
+            "name": "n4j_user",
+            "description": "Neo4j server user",
+            "type": "string",
+            "multi": false,
+            "required": true
+        },
+        {
+            "name": "n4j_pwd",
+            "description": "Neo4j server password",
+            "type": "string",
+            "multi": false,
+            "required": true
+        },
+        {
+            "name": "threshold",
+            "description": "ApiScout correlation threshold",
+            "type": "string",
+            "multi": false,
+            "required": true
+        }
+
+    ]
+}
diff --git a/analyzers/MalwareClustering/README.md b/analyzers/MalwareClustering/README.md
@@ -0,0 +1,16 @@
+# Prerequisites: 
+
+## Required:
+	- [neo4j db instance](https://neo4j.com/download/)
+	- pip3 install -r requirements
+
+## Optional:
+	- bulk import known malware samples in db from:
+		- [cloned malpedia repo](https://malpedia.caad.fkie.fraunhofer.de/)
+		- folder with some malicious sample with optional json malpedia like definition
+
+```
+from malwareclustering_api import Api
+test = Api(host='127.0.0.1', port=7474, user='neo4j', password='password', threshold=40, folder_path='/home/user/malware_samples')
+test.process()
+```
diff --git a/analyzers/MalwareClustering/data/UserDB.TXT b/analyzers/MalwareClustering/data/UserDB.TXT
diff --git a/analyzers/MalwareClustering/data/winapi1024v1.txt b/analyzers/MalwareClustering/data/winapi1024v1.txt
diff --git a/analyzers/MalwareClustering/malwareclustering.py b/analyzers/MalwareClustering/malwareclustering.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# encoding: utf-8
+import time
+import hashlib
+
+from malwareclustering_api import Api
+from cortexutils.analyzer import Analyzer
+
+
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+
+class MalwareClusteringAnalyzer(Analyzer):
+
+    def __init__(self):
+        Analyzer.__init__(self)
+        self.service = self.get_param('config.service', None, 'Service parameter is missing')
+        self.n4j_host = self.get_param('config.n4j_host', None, 'Missing Neo4J Host')
+        self.n4j_port = self.get_param('config.n4j_port', None, 'Missing Neo4J Port')
+        self.n4j_user = self.get_param('config.n4j_user', None, 'Missing Neo4J User')
+        self.n4j_password = self.get_param('config.n4j_pwd', None, 'Missing Neo4J Password')
+        self.threshold = self.get_param('config.threshold', None, 'Missing ApiScout Threshold')
+        self.polling_interval = self.get_param('config.polling_interval', 60)
+        self.n4j_api = Api(self.n4j_host,self.n4j_port,self.n4j_user,self.n4j_password,self.threshold)
+
+    def summary(self, raw):
+        taxonomies = []
+        namespace = "MalwareClustering"
+        predicate = "Family"
+        score = -1
+
+        result = { "has_result": True }   
+
+        value = raw.get('info', {}).get('tag', None) if raw.get('info', {}) else None
+
+        if not value:
+            top_family = max([(x['tag'], x['max']) for x in raw.get('families', [])], key = lambda x: x[1]) if raw.get('families', None) else None
+
+            if top_family:
+                (value, score) = top_family
+                if score > 70:
+                    level = "malicious"
+                elif score > 40:
+                    level = "suspicious"
+            else:
+                value = "No info"
+                level = "info"
+        else:
+            level = "malicious"        
+
+        taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
+        return {"taxonomies": taxonomies}
+
+    def run(self):
+        if self.service == 'search':
+            if self.data_type == 'file':
+                filename = self.get_param('filename', 'noname.ext')
+                filepath = self.get_param('file', None, 'File is missing')
+                return self.report(self.n4j_api.process_file(filepath, filename))
+            elif self.data_type == 'hash':
+                data = self.get_param('data', None, 'Data is missing')
+                return self.report(self.n4j_api.search_hash(data))
+            else:
+                self.error('Invalid data type')
+        else:
+            self.error('Invalid service')
+
+
+if __name__ == '__main__':
+    MalwareClusteringAnalyzer().run()