diff --git a/analyzers/EmlParser/Eml_Parser.json b/analyzers/EmlParser/Eml_Parser.json new file mode 100644 index 000000000..0d0273dde --- /dev/null +++ b/analyzers/EmlParser/Eml_Parser.json @@ -0,0 +1,18 @@ +{ + "name": "Eml_Parser", + "version": "1.0", + "author": "ninsmith", + "url": "https://github.com/TheHive-Project/Cortex-Analyzers", + "license": "AGPL-V3", + "baseconfig": "Eml_Parser", + "config": { + "check_tlp": false, + "max_tlp": 3, + "service": "" + }, + "description": "Parse Eml message", + "dataTypeList": [ + "file" + ], + "command": "EmlParser/parse.py" +} diff --git a/analyzers/EmlParser/parse.py b/analyzers/EmlParser/parse.py new file mode 100755 index 000000000..a4dba65c2 --- /dev/null +++ b/analyzers/EmlParser/parse.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# encoding: utf-8 +import email.parser +import eml_parser +from cortexutils.analyzer import Analyzer +import magic +import binascii +from pprint import pprint + +class EmlParserAnalyzer(Analyzer): + + def __init__(self): + Analyzer.__init__(self) + + #filename of the observable + self.filename = self.getParam('attachment.name', 'noname.ext') + + #filepath to the observable, looks like /tmp/cortex-4224850437865873235-datafile + self.filepath = self.getParam('file', None, 'File is missing') + + def run(self): + if self.data_type == 'file': + try: + parsingResult = parseEml(self.filepath) + self.report(parsingResult) + except Exception as e: + self.unexpectedError(e) + else: + self.notSupported() + + def summary(self, raw): + taxonomies = [] + level = "info" + namespace = "EmlParser" + predicate = "Attachments" + value = "\"0\"" + + if "attachments" in raw: + value = len(raw["attachments"]) + taxonomies.append(self.build_taxonomy(level, namespace, predicate, value)) + + return {"taxonomies": taxonomies} + + +def parseEml(filepath): + + result = dict() + result['subject'] = str() + result['date'] = str() + result['receivers'] = str() + result['displayFrom'] = str() + result['sender'] = str() + result['topic'] = str() + result['bcc'] = str() + result['displayto'] = str() + result['headers'] = str() + result['body'] = str() + result['attachments'] = list() + + #read the file + with open(filepath, 'r') as f: + raw_eml = f.read() + + #parsing the headers with the email library + #cause eml_parser does not provide raw headers (as far as I know) + hParser = email.parser.HeaderParser() + h = hParser.parsestr(raw_eml) + result['headers'] = (str(h).split('\n\n')[0]) + + parsed_eml = eml_parser.eml_parser.decode_email(filepath, include_raw_body=True, include_attachment_data=True) + #parsed_eml['header'].keys() gives: + #dict_keys(['received_foremail', 'from', 'date', 'received_domain', 'to', 'header', 'received_ip', 'subject', 'received']) + + result['subject'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('subject', '')) + result['date'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('date', '')) + result['receivers'] = ', '.join(parsed_eml.get('header', '').get('to', '')) + result['displayFrom'] = parsed_eml.get('header', '').get('from', '') + result['sender'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('x-env-sender', '')) + result['topic'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('thread-topic', '')) + result['bcc'] = parsed_eml.get('header', '').get('header', '').get('bcc', '') + result['displayto'] = ', '.join(parsed_eml.get('header', '').get('header', '').get('to', '')) + result['body'] = parsed_eml['body'][0]['content'] + + #attachments + try: + for attachment in parsed_eml['attachment']: + attachmentSumUp = dict() + attachmentSumUp['filename'] = attachment.get('filename', '') + + #because of module conflict name with magic + #eml-parser does not provide the mime type + #it has to be calculated, the attachment is in base64 + attachmentSumUp['mime'] = magic.from_buffer(binascii.a2b_base64(attachment['raw'])) + attachmentSumUp['extension'] = attachment.get('extension', '') + + result['attachments'].append(attachmentSumUp) + + except KeyError as e: + pass + + return result + +if __name__ == '__main__': + EmlParserAnalyzer().run() diff --git a/analyzers/EmlParser/requirements.txt b/analyzers/EmlParser/requirements.txt new file mode 100644 index 000000000..08e1c11bc --- /dev/null +++ b/analyzers/EmlParser/requirements.txt @@ -0,0 +1,3 @@ +cortexutils-1.2.4;python_version>='3.5' +eml_parser-1.8;python_version>='3.5' +python-magic-0.4.15;python_version>='3.5' diff --git a/thehive-templates/Eml_Parser_1_0/long.html b/thehive-templates/Eml_Parser_1_0/long.html new file mode 100644 index 000000000..1291294fc --- /dev/null +++ b/thehive-templates/Eml_Parser_1_0/long.html @@ -0,0 +1,76 @@ +
Filename | +Mime Type | +Extension | +
---|---|---|
{{a.filename}} | +{{a.mime}} | +{{a.extension}} | +
{{content.headers}}+
{{content.body}}+