Skip to content

Commit

Permalink
Fixes #169: Bug in automatic artifact extraction
Browse files Browse the repository at this point in the history
* Quickfix for #169: filter input from artifacts, only allow letters for tld part of domains

* Forgot to clean-up. :)

* Quickfix for #169: added same regex change for fqdn detection

* Bump version
  • Loading branch information
3c7 authored Feb 28, 2018
1 parent 0c6e99c commit 938790c
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 7 deletions.
3 changes: 1 addition & 2 deletions contrib/cortexutils/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# encoding: utf-8

import os
import sys
import codecs
Expand Down Expand Up @@ -154,7 +153,7 @@ def summary(self, raw):
def artifacts(self, raw):
# Use the regex extractor, if auto_extract setting is not False
if self.auto_extract:
extractor = Extractor()
extractor = Extractor(ignore=self.get_data())
return extractor.check_iterable(raw)

# Return empty list
Expand Down
13 changes: 10 additions & 3 deletions contrib/cortexutils/extractor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
from builtins import str as unicode

import re


Expand All @@ -11,9 +12,13 @@ class Extractor:
Currently, this is not a fulltext search, so the the ioc's must be isolated strings, to get found.
This can be iterated for ioc's.
:param ignore: List of strings or a single string to ignore when matching artifacts to type
:type ignore: list, str
"""

def __init__(self):
def __init__(self, ignore=None):
self.ignore = ignore
self.regex = self.__init_regex()

@staticmethod
Expand Down Expand Up @@ -65,7 +70,7 @@ def __init_regex():
# domain
regex.append({
'type': 'domain',
'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.\w+$')
'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$')
})

# hash
Expand Down Expand Up @@ -103,7 +108,7 @@ def __init_regex():
# fqdn
regex.append({
'type': 'fqdn',
'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.\w+$')
'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.[a-zA-Z]+$')
})

return regex
Expand All @@ -116,6 +121,8 @@ def __checktype(self, value):
:return: Data type of value, if known, else empty string
:rtype: str
"""
if self.ignore and value in self.ignore:
return ''

if isinstance(value, (str, unicode)):
for r in self.regex:
Expand Down
2 changes: 1 addition & 1 deletion contrib/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='cortexutils',
version='1.2.0',
version='1.2.1',
description='A Python library for including utility classes for Cortex analyzers',
long_description=open('README').read(),
author='TheHive-Project',
Expand Down
2 changes: 1 addition & 1 deletion contrib/tests/test_suite_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def setUp(self):
load_test_fixture('fixtures/test-report-response.json')
self.analyzer = Analyzer()

def test_error_response(self):
def test_report_response(self):
# Run the analyzer report method
self.analyzer.report({'report_id':'12345'})

Expand Down
14 changes: 14 additions & 0 deletions contrib/tests/test_suite_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,17 @@ def test_iterable(self):
l_expected,
'Check_iterable: wrong list returned.'
)

def test_float_domain(self):
self.assertEqual(
self.extractor.check_string(value='0.001234'),
'',
'Check_float: float was recognized as domain, but should not.'
)

def test_float_fqdn(self):
self.assertEqual(
self.extractor.check_string(value='0.1234.5678'),
'',
'Check_float_fqdn: float was recognized as fqdn but should not.'
)
35 changes: 35 additions & 0 deletions contrib/tests/test_suite_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python
# coding: utf-8
import json
import unittest
import sys

from cortexutils.analyzer import Analyzer

# Different lib when using python3 or 2
if sys.version_info >= (3, 0):
from io import StringIO
else:
from StringIO import StringIO

class AnalyzerExtractorOutputTest(unittest.TestCase):
def setUp(self):
sys.stdin = StringIO(json.dumps({
"data": "8.8.8.8",
"dataType": "ip"
}))
sys.stdout = StringIO()
self.analyzer = Analyzer()

def test_output(self):
# Run the report method
self.analyzer.report({'result': '1.2.3.4'})

# Grab the output
output = self.analyzer.fpoutput.getvalue().strip()
json_output = json.loads(output)

# Checks
self.assertNotIn(self.analyzer.get_data(), output)
self.assertEqual(json_output['artifacts'][0]['value'], '1.2.3.4')
self.assertEqual(json_output['artifacts'][0]['type'], 'ip')

0 comments on commit 938790c

Please sign in to comment.