Skip to content

Commit

Permalink
#212 WIP update PE and add PDFiD submodule
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromeleonard committed Mar 29, 2018
1 parent decd7d8 commit 2ab6ec9
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 23 deletions.
13 changes: 7 additions & 6 deletions analyzers/FileInfo/fileinfo_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from cortexutils.analyzer import Analyzer
from submodules import *

# import submodules

class FileInfoAnalyzer(Analyzer):
def __init__(self):
Expand All @@ -15,10 +15,10 @@ def __init__(self):
#self.auto_extract = False

# Create a dictionary of custom submodules
self.available_submodules = [
GZIPSubmodule(),
PESubmodule()
]
# self.available_submodules = [
# GZIPSubmodule(),
# PESubmodule()
# ]

def run(self):
results = []
Expand All @@ -30,7 +30,8 @@ def run(self):
'results': m.analyze_file(self.filepath)
})

for module in self.available_submodules:
# for module in self.available_submodules:
for module in AVAILABLE_SUBMODULES:
if module.check_file(file=self.filepath, filetype=self.filetype):
# temporary report
results.append({
Expand Down
2 changes: 2 additions & 0 deletions analyzers/FileInfo/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
cortexutils
python-magic
ssdeep
git+https://github.com/AnyMaster/pehashng
git+https://github.com/Rafiot/pdfid.git
17 changes: 14 additions & 3 deletions analyzers/FileInfo/submodules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
from .submodule_metadata import MetadataSubmodule
from .submodule_gzip import GZIPSubmodule
from .submodule_pe import PESubmodule
# from .submodule_metadata import MetadataSubmodule
# from .submodule_gzip import GZIPSubmodule
# from .submodule_pe import PESubmodule

from .submodule_metadata import *
from .submodule_gzip import *
from .submodule_pe import *
from .submodule_pdfid import *


AVAILABLE_SUBMODULES = [MetadataSubmodule(),
GZIPSubmodule(),
PESubmodule(),
PDFIDSubmodule()]
6 changes: 4 additions & 2 deletions analyzers/FileInfo/submodules/submodule_metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import magic
import hashlib
import io
import os
import pyexifinfo

from .submodule_base import SubmoduleBaseclass
Expand All @@ -10,7 +11,7 @@
class MetadataSubmodule(SubmoduleBaseclass):
def __init__(self):
SubmoduleBaseclass.__init__(self)
self.name = 'Metadata'
self.name = 'Basic properties'

def check_file(self, **kwargs):
"""
Expand Down Expand Up @@ -57,7 +58,8 @@ def analyze_file(self, path):
self.add_result_subsection('Filetype determination', {
'Magic literal': magicliteral,
'MimeType': mimetype,
'Filetype': pyexifinfo.fileType(path)
'Filetype': pyexifinfo.fileType(path),
'Filesize': os.path.getsize(path)

})

Expand Down
31 changes: 31 additions & 0 deletions analyzers/FileInfo/submodules/submodule_pdfid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pdfid.pdfid import *
import optparse
import json

from .submodule_base import SubmoduleBaseclass

class PDFIDSubmodule(SubmoduleBaseclass):
def __init__(self):
SubmoduleBaseclass.__init__(self)
self.name = 'PDF Information'

def check_file(self, **kwargs):
"""
PE submodule will analyze every PE like EXE, DLL or DRIVER, therefore it will always return true.
:return: True
"""
if kwargs.get('filetype') in ['PDF']:
return True

def pdfid_cmd(self, path):
try:
j = json.loads(
PDFiD2JSON(PDFiD(path, allNames=True, extraData=True, disarm=True, force=True), force=True))
print(j)
except Exception as e:
print(e)

def analyze_file(self, path):
self.add_result_subsection('pdfid', self.pdfid_cmd(path))
return self.results
78 changes: 66 additions & 12 deletions analyzers/FileInfo/submodules/submodule_pe.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import magic
import hashlib
import io
import pyexifinfo
import pefile
import pehashng

from .submodule_base import SubmoduleBaseclass
from ssdeep import Hash


class PESubmodule(SubmoduleBaseclass):
def __init__(self):
SubmoduleBaseclass.__init__(self)
self.name = 'PE'
self.name = 'PE Information'

def check_file(self, **kwargs):
"""
Expand All @@ -22,27 +18,85 @@ def check_file(self, **kwargs):
if kwargs.get('filetype') in ['Win32 EXE']:
return True

def PE_info(self, pe):

def pe_machine(self, pedict):
if pedict:
machinetype = pedict.get('FILE_HEADER').get('Machine').get('Value')
mt = {'0x14c': 'x86', '0x0200': 'Itanium', '0x8664': 'x64'}
if type(machinetype) is int:
return mt[str(hex(machinetype))]
else:
return str(machinetype) + ' => Not x86/64 or Itanium'

def compilation_timestamp(self, pedict):
if pedict:
return pedict.get('FILE_HEADER').get('TimeDateStamp').get('Value')
else:
return 'None'

def pe_entrypoint(self, pedict):
if pedict:
return hex(pedict.get('OPTIONAL_HEADER').get('AddressOfEntryPoint').get('Value'))
else:
return 'None'

def pe_info(self, pe):
pedict = pe.dump_dict()
table = []
try:
for fileinfo in pe.FileInfo:
if fileinfo.Key.decode() == 'StringFileInfo':
for stringtable in fileinfo.StringTable:
for entry in stringtable.entries.items():
table.append({'Info': entry[0].decode(), 'Value': entry[1].decode()})

table.append({'Info': 'Compilation Timestamp',
'Value': self.compilation_timestamp(pedict)})
table.append({'Info': 'Target machine', 'Value': self.pe_machine(pedict)}),
table.append({'Info': 'Entry Point', 'Value': self.pe_entrypoint(pedict)})
return table
except Exception as excp:
return 'None'

def pe_iat(self, pe):
if pe:
table = []
for entry in pe.DIRECTORY_ENTRY_IMPORT:
imp = {'entryname': '', 'symbols': []}
imp['entryname'] = entry.dll.decode()
for symbol in entry.imports:
if symbol.name is not None:
imp['symbols'].append(symbol.name.decode())
table.append(imp)
return table

# PE:Sections list of {Name, Size, Entropy, MD5, SHA1, SHA256, SHA512} #
def pe_sections(self, pe):
if pe:
table = []
for entry in pe.sections:
sect = {'entryname': str(entry.Name.decode()), 'SizeOfRawData': hex(entry.SizeOfRawData),
'Entropy': entry.get_entropy(),
'MD5': entry.get_hash_md5(),
'SHA1': entry.get_hash_sha1(),
'SHA256': entry.get_hash_sha256(),
'SHA512': entry.get_hash_sha512()}
table.append(sect)
sect = {}
return table

def analyze_file(self, path):
try:
pe = pefile.PE(path)
pedict = pe.dump_dict()
except Exception as excp:
print("Failed processing {}".format(path))

self.add_result_subsection('PE Info', {
"Info": self.PE_info(pe)
})

return self.results
self.add_result_subsection('Headers', self.pe_info(pe))
self.add_result_subsection('Hashes', {
'impash': pe.get_imphash(),
'pehash': pehashng.pehashng(pe)
})
self.add_result_subsection('Import Adress Tables', self.pe_iat(pe))
self.add_result_subsection('Sections', self.pe_sections(pe))
return self.results

0 comments on commit 2ab6ec9

Please sign in to comment.