From 47c14682088e9a710ab1bff42ce67356e799e68f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Leonard?= Date: Wed, 18 Apr 2018 22:26:03 +0200 Subject: [PATCH] #212 WIP Outlook Mail parser --- analyzers/FileInfo/fileinfo_analyzer.py | 5 ++- analyzers/FileInfo/requirements.txt | 2 + analyzers/FileInfo/submodules/__init__.py | 4 +- .../FileInfo/submodules/submodule_outlook.py | 38 +++++++++++++++++-- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/analyzers/FileInfo/fileinfo_analyzer.py b/analyzers/FileInfo/fileinfo_analyzer.py index 7d275f251..97a2c2578 100755 --- a/analyzers/FileInfo/fileinfo_analyzer.py +++ b/analyzers/FileInfo/fileinfo_analyzer.py @@ -13,7 +13,7 @@ def __init__(self): self.filepath = self.get_param('file', None, 'File parameter is missing.') self.filename = self.get_param('filename', None, 'Filename is missing.') self.filetype = pyexifinfo.fileType(self.filepath) - self.mimtype = magic.Magic(mime=True).from_file(path) + self.mimetype = magic.Magic(mime=True).from_file(self.filepath) def run(self): results = [] @@ -26,7 +26,8 @@ def run(self): }) for module in available_submodules: - if module.check_file(file=self.filepath, filetype=self.filetype, filename=self.filename): + if module.check_file(file=self.filepath, filetype=self.filetype, filename=self.filename, + mimetype=self.mimetype): results.append({ 'submodule_name': module.name, 'results': module.analyze_file(self.filepath) diff --git a/analyzers/FileInfo/requirements.txt b/analyzers/FileInfo/requirements.txt index 4a6224d69..01c05ec4e 100644 --- a/analyzers/FileInfo/requirements.txt +++ b/analyzers/FileInfo/requirements.txt @@ -6,3 +6,5 @@ pefile git+https://github.com/AnyMaster/pehashng git+https://github.com/Rafiot/pdfid.git oletools +git+https://github.com/mattgwwalker/msg-extractor.git +IMAPClient diff --git a/analyzers/FileInfo/submodules/__init__.py b/analyzers/FileInfo/submodules/__init__.py index 365e7c904..152e2286b 100644 --- a/analyzers/FileInfo/submodules/__init__.py +++ b/analyzers/FileInfo/submodules/__init__.py @@ -1,9 +1,11 @@ from .submodule_oletools import OLEToolsSubmodule from .submodule_pe import PESubmodule from .submodule_pdfid import PDFIDSubmodule +from .submodule_outlook import OutlookSubmodule available_submodules = [ PESubmodule(), OLEToolsSubmodule(), - PDFIDSubmodule() + PDFIDSubmodule(), + OutlookSubmodule() ] diff --git a/analyzers/FileInfo/submodules/submodule_outlook.py b/analyzers/FileInfo/submodules/submodule_outlook.py index a8e4edf60..900df5445 100644 --- a/analyzers/FileInfo/submodules/submodule_outlook.py +++ b/analyzers/FileInfo/submodules/submodule_outlook.py @@ -1,5 +1,7 @@ from .submodule_base import SubmoduleBaseclass +from ExtractMsg import Message, Attachment +from imapclient.imapclient import decode_utf7 class OutlookSubmodule(SubmoduleBaseclass): """This is just for showing how to include a submodule. No real functionality here.""" @@ -9,10 +11,38 @@ def __init__(self): self.name = 'Outlook mail Information' def check_file(self, **kwargs): - if kwargs.get('filetype') == 'GZIP': - return True + try: + if kwargs.get('mimetype') == 'application/vnd.ms-outlook': + return True + except KeyError: + return False return False def analyze_file(self, path): - self.add_result_subsection('TEST', {}) - return self.resul \ No newline at end of file + + m = Message(path) + + def xstr(s): + return '' if s is None else str(s) + + attachments = m.attachments + a = [] + for attachment in attachments: + with attachment.data as fh: + buf = fh.read() + sha256 = hashlib.sha256() + sha256.update(buf) + a.append({'name': attachment.longFilename, + 'sha256': sha256}) + + email = { 'header': xstr(m.header), + 'from': xstr(m.sender), + 'to': xstr(m.to), + 'cc': xstr(m.cc), + 'subject': xstr(m.subject), + 'date': xstr(m.date), + 'body': decode_utf7(m.body), + 'attachments': a + } + self.add_result_subsection('Email details', email) + return self.results \ No newline at end of file