Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add URLHaus analyzer #271

Merged
merged 7 commits into from
Jul 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions analyzers/URLhaus/URLhaus.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"name": "URLhaus",
"author": "ninoseki",
"license": "MIT",
"url": "https://github.com/ninoseki/cortex_URLhaus_analyzer",
"version": "1.0",
"description": "Search domains, URLs or hashes on URLhaus.",
"dataTypeList": ["domain", "url", "hash"],
"command": "URLhaus/URLhaus_analyzer.py",
"configurationItems": [
{
"name": "cache.duration",
"description": "Define the cache duration",
"type": "number",
"multi": false,
"required": true,
"defaultValue": 300
},
{
"name": "cache.root",
"description": "Define the path to the stored data",
"type": "string",
"multi": false,
"required": false
}
]
}
64 changes: 64 additions & 0 deletions analyzers/URLhaus/URLhaus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from bs4 import BeautifulSoup
from diskcache import Cache
import requests


class URLhaus:
"""Simple client to query URLhaus by abuse.ch.
:param query: domain, url or hash.
:param cache_duration: Duration before refreshing the cache (in seconds).
Ignored if `cache_duration` is 0.
:param cache_root: Path where to store the cached file.
:type query: string
:type cache_duration: int
:type cache_root: str
"""

def __init__(self,
query,
cache_duration=300,
cache_root="/tmp/cortex/URLhaus"):
self.URL = "https://urlhaus.abuse.ch/browse.php"
self.query = query
self.cache = None
if cache_duration > 0:
self.cache = Cache(cache_root)
self.cache_duration = cache_duration

def _get_raw_data(self):
try:
return self.cache[self.query.encode('utf-8')]
except(AttributeError, TypeError):
return self.fetch()
except KeyError:
self.cache.set(
self.query.encode('utf-8'),
self.fetch(),
expire=self.cache_duration)
return self.cache[self.query.encode('utf-8')]

def search(self):
res = self._get_raw_data()
return self.parse(res)

def fetch(self):
payload = {"search": self.query}
return requests.get(self.URL, params=payload).text

def parse(self, doc):
results = []
soup = BeautifulSoup(doc, "html.parser")
table = soup.find("table", class_="table")
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
results.append({
"dateadded": cols[0].text,
"malware_url": cols[1].text,
"link": cols[1].find("a").attrs.get("href"),
"status": cols[2].text,
"tags": cols[3].text.split(),
"gsb": cols[4].text,
"reporter": cols[5].text
})
return results
50 changes: 50 additions & 0 deletions analyzers/URLhaus/URLhaus_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3
from cortexutils.analyzer import Analyzer
from URLhaus import URLhaus


class URLhausAnalyzer(Analyzer):
def __init__(self):
Analyzer.__init__(self)

def search(self, indicator):
"""
Searches for a website using the indicator
:param indicator: domain, url, hash
:type indicator: str
:return: dict
"""
return URLhaus(indicator).search()

def run(self):
targets = ["domain", "url", "hash"]
if self.get_data() is not None and self.data_type in targets:
self.report({
'results': self.search(self.get_data())
})

def summary(self, raw):
taxonomies = []
level = "info"
namespace = "URLhaus"
predicate = "Search"
value = "\"0 result\""

results = raw["results"]
if len(results) >= 1:
level = "malicious"

if len(results) <= 1:
value = "\"{} result\"".format(len(results))
else:
value = "\"{} results\"".format(len(results))

taxonomies.append(
self.build_taxonomy(level, namespace, predicate, value)
)

return {"taxonomies": taxonomies}


if __name__ == '__main__':
URLhausAnalyzer().run()
4 changes: 4 additions & 0 deletions analyzers/URLhaus/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
beautifulsoup4
cortexutils
diskcache
requests
51 changes: 51 additions & 0 deletions thehive-templates/URLhaus_1_0/long.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<div class="panel panel-info" ng-if="success">
<div class="panel-heading">
URLhaus search results for
<strong>{{artifact.data}}</strong>
</div>
<div class="panel-body">
<p ng-if="content.results.length == 0">
No result found.
</p>
<table class="table" ng-if="content.results">
<thead>
<th>Dateadded (UTC)</th>
<th>Malware URL</th>
<th>Status</th>
<th>Tags</th>
<th>GSB</th>
<th>Reporter</th>
</thead>
<tbody ng-repeat="r in content.results">
<tr>
<td>{{r.dateadded}}</td>
<td>
<a href="https://urlhaus.abuse.ch{{r.link}}" target=”_blank”>
{{r.malware_url}}
</a>
</td>
<td><span class="label" ng-class="{'label-danger': r.status==='Online', 'label-success': r.status!=='Online'}">{{r.status}}</span></td>
<td>
<span ng-repeat="tag in r.tags"> <span class="label label-info">{{tag}}</span></span>
</td>
<td><span class="label" ng-class="{'label-success': r.gsb === 'Clean', 'label-danger': r.gsb !== 'Clean'}">{{r.gsb}}</span></td>
<td><a target="_blank" href="https://twitter.com/{{r.reporter.substring(1)}}">{{r.reporter}}</a></td>
</tr>
</tbody>
</table>
</div>
</div>

<!-- General error -->
<div class="panel panel-danger" ng-if="!success">
<div class="panel-heading">
<strong>{{artifact.data | fang}}</strong>
</div>
<div class="panel-body">
<dl class="dl-horizontal" ng-if="content.errorMessage">
<dt>
<i class="fa fa-warning"></i> urlscan.io: </dt>
<dd class="wrap">{{content.errorMessage}}</dd>
</dl>
</div>
</div>
3 changes: 3 additions & 0 deletions thehive-templates/URLhaus_1_0/short.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<span class="label" ng-repeat="t in content.taxonomies" ng-class="{'info': 'label-info', 'safe': 'label-success', 'suspicious': 'label-warning', 'malicious':'label-danger'}[t.level]">
{{t.namespace}}:{{t.predicate}}={{t.value}}
</span>