From e6e2a5142631a7fe2b5a390ef3dff576b86e781e Mon Sep 17 00:00:00 2001 From: ninoseki Date: Sat, 9 Jun 2018 14:02:55 +0900 Subject: [PATCH 1/2] =?UTF-8?q?Switch=20`request-html=E2=80=99=20from=20`b?= =?UTF-8?q?eautifulsoup`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- analyzers/URLhaus/URLhaus.py | 14 +++--- analyzers/URLhaus/requirements.txt | 2 +- thehive-templates/URLhaus_0_1_0/long.html | 51 ++++++++++++++++++++++ thehive-templates/URLhaus_0_1_0/short.html | 3 ++ 4 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 thehive-templates/URLhaus_0_1_0/long.html create mode 100644 thehive-templates/URLhaus_0_1_0/short.html diff --git a/analyzers/URLhaus/URLhaus.py b/analyzers/URLhaus/URLhaus.py index 938eb7106..cd8d7771e 100644 --- a/analyzers/URLhaus/URLhaus.py +++ b/analyzers/URLhaus/URLhaus.py @@ -1,5 +1,5 @@ +from bs4 import BeautifulSoup from diskcache import Cache -from requests_html import HTML import requests @@ -16,7 +16,7 @@ class URLhaus: def __init__(self, query, - cache_duration=3600, + cache_duration=300, cache_root="/tmp/cortex/URLhaus"): self.URL = "https://urlhaus.abuse.ch/browse.php" self.query = query @@ -47,15 +47,15 @@ def fetch(self): def parse(self, doc): results = [] - html = HTML(html=doc) - table = html.find("table.table", first=True) - rows = table.find("tr")[1:] + soup = BeautifulSoup(doc, "html.parser") + table = soup.find("table", class_="table") + rows = table.find_all("tr")[1:] for row in rows: - cols = row.find("td") + cols = row.find_all("td") results.append({ "dateadded": cols[0].text, "malware_url": cols[1].text, - "link": cols[1].find("a", first=True).attrs.get("href"), + "link": cols[1].find("a").attrs.get("href"), "status": cols[2].text, "tags": cols[3].text.split(), "gsb": cols[4].text, diff --git a/analyzers/URLhaus/requirements.txt b/analyzers/URLhaus/requirements.txt index cb7532131..442450a28 100644 --- a/analyzers/URLhaus/requirements.txt +++ b/analyzers/URLhaus/requirements.txt @@ -1,4 +1,4 @@ +beautifulsoup4 cortexutils diskcache requests -requests-html diff --git a/thehive-templates/URLhaus_0_1_0/long.html b/thehive-templates/URLhaus_0_1_0/long.html new file mode 100644 index 000000000..d8fa06a14 --- /dev/null +++ b/thehive-templates/URLhaus_0_1_0/long.html @@ -0,0 +1,51 @@ +
+
+ URLhaus search resutls for + {{artifact.data}} +
+
+

+ No result found. +

+ + + + + + + + + + + + + + + + + + + +
Dateadded (UTC)Malware URLStatusTagsGSBReporter
{{r.dateadded}} + + {{r.malware_url}} + + {{r.status}} + {{tag}} + {{r.gsb}}{{r.reporter}}
+
+
+ + +
+
+ {{artifact.data | fang}} +
+
+
+
+ urlscan.io:
+
{{content.errorMessage}}
+
+
+
diff --git a/thehive-templates/URLhaus_0_1_0/short.html b/thehive-templates/URLhaus_0_1_0/short.html new file mode 100644 index 000000000..5959ee1b9 --- /dev/null +++ b/thehive-templates/URLhaus_0_1_0/short.html @@ -0,0 +1,3 @@ + + {{t.namespace}}:{{t.predicate}}={{t.value}} + From 10d130a0ce17931c53f252214e32c6942840c1e3 Mon Sep 17 00:00:00 2001 From: ninoseki Date: Sat, 9 Jun 2018 14:06:22 +0900 Subject: [PATCH 2/2] Change the cache duration time --- analyzers/URLhaus/URLhaus.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analyzers/URLhaus/URLhaus.json b/analyzers/URLhaus/URLhaus.json index fbb18578e..c40a336a8 100644 --- a/analyzers/URLhaus/URLhaus.json +++ b/analyzers/URLhaus/URLhaus.json @@ -14,7 +14,7 @@ "type": "number", "multi": false, "required": true, - "defaultValue": 3600 + "defaultValue": 300 }, { "name": "cache.root",