#212 WIP update PE and add PDFiD submodule

TheHive-Project · Mar 29, 2018 · 2ab6ec9 · 2ab6ec9
1 parent decd7d8
commit 2ab6ec9
Show file tree

Hide file tree

Showing 6 changed files with 124 additions and 23 deletions.
diff --git a/analyzers/FileInfo/fileinfo_analyzer.py b/analyzers/FileInfo/fileinfo_analyzer.py
@@ -4,7 +4,7 @@
 
 from cortexutils.analyzer import Analyzer
 from submodules import *
-
+# import submodules
 
 class FileInfoAnalyzer(Analyzer):
     def __init__(self):
@@ -15,10 +15,10 @@ def __init__(self):
         #self.auto_extract = False
 
         # Create a dictionary of custom submodules
-        self.available_submodules = [
-            GZIPSubmodule(),
-            PESubmodule()
-        ]
+        # self.available_submodules = [
+        #     GZIPSubmodule(),
+        #     PESubmodule()
+        # ]
 
     def run(self):
         results = []
@@ -30,7 +30,8 @@ def run(self):
             'results': m.analyze_file(self.filepath)
         })
 
-        for module in self.available_submodules:
+        # for module in self.available_submodules:
+        for module in AVAILABLE_SUBMODULES:
             if module.check_file(file=self.filepath, filetype=self.filetype):
                 # temporary report
                 results.append({

diff --git a/analyzers/FileInfo/requirements.txt b/analyzers/FileInfo/requirements.txt
@@ -1,3 +1,5 @@
 cortexutils
 python-magic
 ssdeep
+git+https://github.com/AnyMaster/pehashng
+git+https://github.com/Rafiot/pdfid.git
diff --git a/analyzers/FileInfo/submodules/__init__.py b/analyzers/FileInfo/submodules/__init__.py
@@ -1,3 +1,14 @@
-from .submodule_metadata import MetadataSubmodule
-from .submodule_gzip import GZIPSubmodule
-from .submodule_pe import PESubmodule
+# from .submodule_metadata import MetadataSubmodule
+# from .submodule_gzip import GZIPSubmodule
+# from .submodule_pe import PESubmodule
+
+from .submodule_metadata import *
+from .submodule_gzip import *
+from .submodule_pe import *
+from .submodule_pdfid import *
+
+
+AVAILABLE_SUBMODULES = [MetadataSubmodule(),
+           GZIPSubmodule(),
+           PESubmodule(),
+           PDFIDSubmodule()]
diff --git a/analyzers/FileInfo/submodules/submodule_metadata.py b/analyzers/FileInfo/submodules/submodule_metadata.py
@@ -1,6 +1,7 @@
 import magic
 import hashlib
 import io
+import os
 import pyexifinfo
 
 from .submodule_base import SubmoduleBaseclass
@@ -10,7 +11,7 @@
 class MetadataSubmodule(SubmoduleBaseclass):
     def __init__(self):
         SubmoduleBaseclass.__init__(self)
-        self.name = 'Metadata'
+        self.name = 'Basic properties'
 
     def check_file(self, **kwargs):
         """
@@ -57,7 +58,8 @@ def analyze_file(self, path):
         self.add_result_subsection('Filetype determination', {
             'Magic literal': magicliteral,
             'MimeType': mimetype,
-            'Filetype': pyexifinfo.fileType(path)
+            'Filetype': pyexifinfo.fileType(path),
+            'Filesize': os.path.getsize(path)
 
         })
 

diff --git a/analyzers/FileInfo/submodules/submodule_pdfid.py b/analyzers/FileInfo/submodules/submodule_pdfid.py
@@ -0,0 +1,31 @@
+from pdfid.pdfid import *
+import optparse
+import json
+
+from .submodule_base import SubmoduleBaseclass
+
+class PDFIDSubmodule(SubmoduleBaseclass):
+    def __init__(self):
+        SubmoduleBaseclass.__init__(self)
+        self.name = 'PDF Information'
+
+    def check_file(self, **kwargs):
+        """
+        PE submodule will analyze every PE like EXE, DLL or DRIVER, therefore it will always return true.
+
+        :return: True
+        """
+        if kwargs.get('filetype') in ['PDF']:
+            return True
+
+    def pdfid_cmd(self, path):
+        try:
+            j = json.loads(
+                PDFiD2JSON(PDFiD(path, allNames=True, extraData=True, disarm=True, force=True), force=True))
+            print(j)
+        except Exception as e:
+            print(e)
+
+    def analyze_file(self, path):
+        self.add_result_subsection('pdfid', self.pdfid_cmd(path))
+        return self.results
diff --git a/analyzers/FileInfo/submodules/submodule_pe.py b/analyzers/FileInfo/submodules/submodule_pe.py
@@ -1,17 +1,13 @@
-import magic
-import hashlib
-import io
-import pyexifinfo
 import pefile
+import pehashng
 
 from .submodule_base import SubmoduleBaseclass
-from ssdeep import Hash
 
 
 class PESubmodule(SubmoduleBaseclass):
     def __init__(self):
         SubmoduleBaseclass.__init__(self)
-        self.name = 'PE'
+        self.name = 'PE Information'
 
     def check_file(self, **kwargs):
         """
@@ -22,27 +18,85 @@ def check_file(self, **kwargs):
         if kwargs.get('filetype') in ['Win32 EXE']:
             return True
 
-    def PE_info(self, pe):
+
+    def pe_machine(self, pedict):
+        if pedict:
+            machinetype = pedict.get('FILE_HEADER').get('Machine').get('Value')
+            mt = {'0x14c': 'x86', '0x0200': 'Itanium', '0x8664': 'x64'}
+            if type(machinetype) is int:
+                return mt[str(hex(machinetype))]
+            else:
+                return str(machinetype) + ' => Not x86/64 or Itanium'
+
+    def compilation_timestamp(self, pedict):
+        if pedict:
+            return pedict.get('FILE_HEADER').get('TimeDateStamp').get('Value')
+        else:
+            return 'None'
+
+    def pe_entrypoint(self, pedict):
+        if pedict:
+            return hex(pedict.get('OPTIONAL_HEADER').get('AddressOfEntryPoint').get('Value'))
+        else:
+            return 'None'
+
+    def pe_info(self, pe):
+        pedict = pe.dump_dict()
         table = []
         try:
             for fileinfo in pe.FileInfo:
                 if fileinfo.Key.decode() == 'StringFileInfo':
                     for stringtable in fileinfo.StringTable:
                         for entry in stringtable.entries.items():
                             table.append({'Info': entry[0].decode(), 'Value': entry[1].decode()})
+
+            table.append({'Info': 'Compilation Timestamp',
+                          'Value': self.compilation_timestamp(pedict)})
+            table.append({'Info': 'Target machine', 'Value': self.pe_machine(pedict)}),
+            table.append({'Info': 'Entry Point', 'Value': self.pe_entrypoint(pedict)})
             return table
         except Exception as excp:
             return 'None'
 
+    def pe_iat(self, pe):
+        if pe:
+            table = []
+            for entry in pe.DIRECTORY_ENTRY_IMPORT:
+                imp = {'entryname': '', 'symbols': []}
+                imp['entryname'] = entry.dll.decode()
+                for symbol in entry.imports:
+                    if symbol.name is not None:
+                        imp['symbols'].append(symbol.name.decode())
+                table.append(imp)
+        return table
+
+    # PE:Sections list of {Name, Size, Entropy, MD5, SHA1, SHA256, SHA512} #
+    def pe_sections(self, pe):
+        if pe:
+            table = []
+            for entry in pe.sections:
+                sect = {'entryname': str(entry.Name.decode()), 'SizeOfRawData': hex(entry.SizeOfRawData),
+                        'Entropy': entry.get_entropy(),
+                        'MD5': entry.get_hash_md5(),
+                        'SHA1': entry.get_hash_sha1(),
+                        'SHA256': entry.get_hash_sha256(),
+                        'SHA512': entry.get_hash_sha512()}
+                table.append(sect)
+                sect = {}
+        return table
+
     def analyze_file(self, path):
         try:
             pe = pefile.PE(path)
             pedict = pe.dump_dict()
         except Exception as excp:
             print("Failed processing {}".format(path))
 
-        self.add_result_subsection('PE Info', {
-            "Info": self.PE_info(pe)
-        })
-
-        return self.results
+        self.add_result_subsection('Headers', self.pe_info(pe))
+        self.add_result_subsection('Hashes', {
+                'impash': pe.get_imphash(),
+                'pehash': pehashng.pehashng(pe)
+            })
+        self.add_result_subsection('Import Adress Tables', self.pe_iat(pe))
+        self.add_result_subsection('Sections', self.pe_sections(pe))
+        return self.results