From 79dc4d1f46660b2c5231d05b0c8e3a92c3c75f2a Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Thu, 12 Apr 2018 16:59:08 +0900 Subject: [PATCH 1/2] Add URLhaus analyzer --- analyzers/URLhaus/URLhaus.json | 10 +++++ analyzers/URLhaus/URLhaus.py | 39 +++++++++++++++++ analyzers/URLhaus/URLhaus_analyzer.py | 50 +++++++++++++++++++++ analyzers/URLhaus/requirements.txt | 1 + thehive-templates/URLhaus_0_1_0/long.html | 51 ++++++++++++++++++++++ thehive-templates/URLhaus_0_1_0/short.html | 3 ++ 6 files changed, 154 insertions(+) create mode 100644 analyzers/URLhaus/URLhaus.json create mode 100644 analyzers/URLhaus/URLhaus.py create mode 100755 analyzers/URLhaus/URLhaus_analyzer.py create mode 100644 analyzers/URLhaus/requirements.txt create mode 100644 thehive-templates/URLhaus_0_1_0/long.html create mode 100644 thehive-templates/URLhaus_0_1_0/short.html diff --git a/analyzers/URLhaus/URLhaus.json b/analyzers/URLhaus/URLhaus.json new file mode 100644 index 000000000..d46d15058 --- /dev/null +++ b/analyzers/URLhaus/URLhaus.json @@ -0,0 +1,10 @@ +{ + "name": "URLhaus", + "author": "ninoseki", + "license": "MIT", + "url": "https://github.com/ninoseki/cortex_URLhaus_analyzer", + "version": "0.1.0", + "description": "Search domains, URLs or hashes on URLhaus", + "dataTypeList": ["domain", "url", "hash"], + "command": "URLhaus/URLhaus_analyzer.py" +} diff --git a/analyzers/URLhaus/URLhaus.py b/analyzers/URLhaus/URLhaus.py new file mode 100644 index 000000000..ccd5531ae --- /dev/null +++ b/analyzers/URLhaus/URLhaus.py @@ -0,0 +1,39 @@ +from requests_html import HTMLSession +import urllib + + +class URLhaus: + def __init__(self, query): + self.URL = "https://urlhaus.abuse.ch/browse.php" + self.query = query + + def search(self): + res = self.fetch() + return self.parse(res) + + def fetch(self): + session = HTMLSession() + return session.get(self.target_url()) + + def parse(self, res): + results = [] + table = res.html.find("table.table", first=True) + rows = table.find("tr")[1:] + for row in rows: + cols = row.find("td") + results.append({ + "dateadded": cols[0].text, + "malware_url": cols[1].text, + "link": cols[1].find("a", first=True).attrs.get("href"), + "status": cols[2].text, + "tags": cols[3].text.split(), + "gsb": cols[4].text, + "reporter": cols[5].text + }) + return results + + def target_url(self): + return "{}?{}".format( + self.URL, + urllib.parse.urlencode({"search": self.query}) + ) diff --git a/analyzers/URLhaus/URLhaus_analyzer.py b/analyzers/URLhaus/URLhaus_analyzer.py new file mode 100755 index 000000000..b60598334 --- /dev/null +++ b/analyzers/URLhaus/URLhaus_analyzer.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +from cortexutils.analyzer import Analyzer +from URLhaus import URLhaus + + +class URLhausAnalyzer(Analyzer): + def __init__(self): + Analyzer.__init__(self) + + def search(self, indicator): + """ + Searches for a website using the indicator + :param indicator: domain, url, hash + :type indicator: str + :return: dict + """ + return URLhaus(indicator).search() + + def run(self): + targets = ["domain", "url", "hash"] + if self.get_data() is not None and self.data_type in targets: + self.report({ + 'results': self.search(self.get_data()) + }) + + def summary(self, raw): + taxonomies = [] + level = "info" + namespace = "URLhaus" + predicate = "Search" + value = "\"0 result\"" + + results = raw["results"] + if len(results) >= 1: + level = "malicious" + + if len(results) <= 1: + value = "\"{} result\"".format(len(results)) + else: + value = "\"{} results\"".format(len(results)) + + taxonomies.append( + self.build_taxonomy(level, namespace, predicate, value) + ) + + return {"taxonomies": taxonomies} + + +if __name__ == '__main__': + URLhausAnalyzer().run() diff --git a/analyzers/URLhaus/requirements.txt b/analyzers/URLhaus/requirements.txt new file mode 100644 index 000000000..a8fdb314a --- /dev/null +++ b/analyzers/URLhaus/requirements.txt @@ -0,0 +1 @@ +requests-html diff --git a/thehive-templates/URLhaus_0_1_0/long.html b/thehive-templates/URLhaus_0_1_0/long.html new file mode 100644 index 000000000..d8fa06a14 --- /dev/null +++ b/thehive-templates/URLhaus_0_1_0/long.html @@ -0,0 +1,51 @@ +
+
+ URLhaus search resutls for + {{artifact.data}} +
+
+

+ No result found. +

+ + + + + + + + + + + + + + + + + + + +
Dateadded (UTC)Malware URLStatusTagsGSBReporter
{{r.dateadded}} + + {{r.malware_url}} + + {{r.status}} + {{tag}} + {{r.gsb}}{{r.reporter}}
+
+
+ + +
+
+ {{artifact.data | fang}} +
+
+
+
+ urlscan.io:
+
{{content.errorMessage}}
+
+
+
diff --git a/thehive-templates/URLhaus_0_1_0/short.html b/thehive-templates/URLhaus_0_1_0/short.html new file mode 100644 index 000000000..57f9d29cf --- /dev/null +++ b/thehive-templates/URLhaus_0_1_0/short.html @@ -0,0 +1,3 @@ + + {{t.namespace}}:{{t.predicate}}={{t.value}} + From 7832cdba3358cec1dc74762d6423289927785a83 Mon Sep 17 00:00:00 2001 From: ninoseki Date: Thu, 12 Apr 2018 21:09:47 +0900 Subject: [PATCH 2/2] Add caching --- analyzers/URLhaus/URLhaus.json | 19 ++++++++++- analyzers/URLhaus/URLhaus.py | 53 ++++++++++++++++++++++-------- analyzers/URLhaus/requirements.txt | 3 ++ 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/analyzers/URLhaus/URLhaus.json b/analyzers/URLhaus/URLhaus.json index d46d15058..cd468e37b 100644 --- a/analyzers/URLhaus/URLhaus.json +++ b/analyzers/URLhaus/URLhaus.json @@ -6,5 +6,22 @@ "version": "0.1.0", "description": "Search domains, URLs or hashes on URLhaus", "dataTypeList": ["domain", "url", "hash"], - "command": "URLhaus/URLhaus_analyzer.py" + "command": "URLhaus/URLhaus_analyzer.py", + "configurationItems": [ + { + "name": "cache.duration", + "description": "Define the cache duration", + "type": "number", + "multi": false, + "required": true, + "defaultValue": 3600 + }, + { + "name": "cache.root", + "description": "Define the path to the stored data", + "type": "string", + "multi": false, + "required": false + } + ] } diff --git a/analyzers/URLhaus/URLhaus.py b/analyzers/URLhaus/URLhaus.py index ccd5531ae..938eb7106 100644 --- a/analyzers/URLhaus/URLhaus.py +++ b/analyzers/URLhaus/URLhaus.py @@ -1,23 +1,54 @@ -from requests_html import HTMLSession -import urllib +from diskcache import Cache +from requests_html import HTML +import requests class URLhaus: - def __init__(self, query): + """Simple client to query URLhaus by abuse.ch. + :param query: domain, url or hash. + :param cache_duration: Duration before refreshing the cache (in seconds). + Ignored if `cache_duration` is 0. + :param cache_root: Path where to store the cached file. + :type query: string + :type cache_duration: int + :type cache_root: str + """ + + def __init__(self, + query, + cache_duration=3600, + cache_root="/tmp/cortex/URLhaus"): self.URL = "https://urlhaus.abuse.ch/browse.php" self.query = query + self.cache = None + if cache_duration > 0: + self.cache = Cache(cache_root) + self.cache_duration = cache_duration + + def _get_raw_data(self): + try: + return self.cache[self.query.encode('utf-8')] + except(AttributeError, TypeError): + return self.fetch() + except KeyError: + self.cache.set( + self.query.encode('utf-8'), + self.fetch(), + expire=self.cache_duration) + return self.cache[self.query.encode('utf-8')] def search(self): - res = self.fetch() + res = self._get_raw_data() return self.parse(res) def fetch(self): - session = HTMLSession() - return session.get(self.target_url()) + payload = {"search": self.query} + return requests.get(self.URL, params=payload).text - def parse(self, res): + def parse(self, doc): results = [] - table = res.html.find("table.table", first=True) + html = HTML(html=doc) + table = html.find("table.table", first=True) rows = table.find("tr")[1:] for row in rows: cols = row.find("td") @@ -31,9 +62,3 @@ def parse(self, res): "reporter": cols[5].text }) return results - - def target_url(self): - return "{}?{}".format( - self.URL, - urllib.parse.urlencode({"search": self.query}) - ) diff --git a/analyzers/URLhaus/requirements.txt b/analyzers/URLhaus/requirements.txt index a8fdb314a..cb7532131 100644 --- a/analyzers/URLhaus/requirements.txt +++ b/analyzers/URLhaus/requirements.txt @@ -1 +1,4 @@ +cortexutils +diskcache +requests requests-html