-
Notifications
You must be signed in to change notification settings - Fork 385
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added latest version of URLhaus analyzer by @ninoseki, needs refactor…
…ing.
- Loading branch information
Showing
4 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{ | ||
"name": "URLhaus", | ||
"author": "ninoseki", | ||
"license": "MIT", | ||
"url": "https://github.com/ninoseki/cortex_URLhaus_analyzer", | ||
"version": "1.0", | ||
"description": "Search domains, URLs or hashes on URLhaus.", | ||
"dataTypeList": ["domain", "url", "hash"], | ||
"command": "URLhaus/URLhaus_analyzer.py", | ||
"configurationItems": [ | ||
{ | ||
"name": "cache.duration", | ||
"description": "Define the cache duration", | ||
"type": "number", | ||
"multi": false, | ||
"required": true, | ||
"defaultValue": 3600 | ||
}, | ||
{ | ||
"name": "cache.root", | ||
"description": "Define the path to the stored data", | ||
"type": "string", | ||
"multi": false, | ||
"required": false | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from diskcache import Cache | ||
from requests_html import HTML | ||
import requests | ||
|
||
|
||
class URLhaus: | ||
"""Simple client to query URLhaus by abuse.ch. | ||
:param query: domain, url or hash. | ||
:param cache_duration: Duration before refreshing the cache (in seconds). | ||
Ignored if `cache_duration` is 0. | ||
:param cache_root: Path where to store the cached file. | ||
:type query: string | ||
:type cache_duration: int | ||
:type cache_root: str | ||
""" | ||
|
||
def __init__(self, | ||
query, | ||
cache_duration=3600, | ||
cache_root="/tmp/cortex/URLhaus"): | ||
self.URL = "https://urlhaus.abuse.ch/browse.php" | ||
self.query = query | ||
self.cache = None | ||
if cache_duration > 0: | ||
self.cache = Cache(cache_root) | ||
self.cache_duration = cache_duration | ||
|
||
def _get_raw_data(self): | ||
try: | ||
return self.cache[self.query.encode('utf-8')] | ||
except(AttributeError, TypeError): | ||
return self.fetch() | ||
except KeyError: | ||
self.cache.set( | ||
self.query.encode('utf-8'), | ||
self.fetch(), | ||
expire=self.cache_duration) | ||
return self.cache[self.query.encode('utf-8')] | ||
|
||
def search(self): | ||
res = self._get_raw_data() | ||
return self.parse(res) | ||
|
||
def fetch(self): | ||
payload = {"search": self.query} | ||
return requests.get(self.URL, params=payload).text | ||
|
||
def parse(self, doc): | ||
results = [] | ||
html = HTML(html=doc) | ||
table = html.find("table.table", first=True) | ||
rows = table.find("tr")[1:] | ||
for row in rows: | ||
cols = row.find("td") | ||
results.append({ | ||
"dateadded": cols[0].text, | ||
"malware_url": cols[1].text, | ||
"link": cols[1].find("a", first=True).attrs.get("href"), | ||
"status": cols[2].text, | ||
"tags": cols[3].text.split(), | ||
"gsb": cols[4].text, | ||
"reporter": cols[5].text | ||
}) | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/usr/bin/env python3 | ||
from cortexutils.analyzer import Analyzer | ||
from URLhaus import URLhaus | ||
|
||
|
||
class URLhausAnalyzer(Analyzer): | ||
def __init__(self): | ||
Analyzer.__init__(self) | ||
|
||
def search(self, indicator): | ||
""" | ||
Searches for a website using the indicator | ||
:param indicator: domain, url, hash | ||
:type indicator: str | ||
:return: dict | ||
""" | ||
return URLhaus(indicator).search() | ||
|
||
def run(self): | ||
targets = ["domain", "url", "hash"] | ||
if self.get_data() is not None and self.data_type in targets: | ||
self.report({ | ||
'results': self.search(self.get_data()) | ||
}) | ||
|
||
def summary(self, raw): | ||
taxonomies = [] | ||
level = "info" | ||
namespace = "URLhaus" | ||
predicate = "Search" | ||
value = "\"0 result\"" | ||
|
||
results = raw["results"] | ||
if len(results) >= 1: | ||
level = "malicious" | ||
|
||
if len(results) <= 1: | ||
value = "\"{} result\"".format(len(results)) | ||
else: | ||
value = "\"{} results\"".format(len(results)) | ||
|
||
taxonomies.append( | ||
self.build_taxonomy(level, namespace, predicate, value) | ||
) | ||
|
||
return {"taxonomies": taxonomies} | ||
|
||
|
||
if __name__ == '__main__': | ||
URLhausAnalyzer().run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
cortexutils | ||
diskcache | ||
requests | ||
requests-html |