forked from fbicyber/assemblyline-service-vmray
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit bbd3864
Showing
7 changed files
with
335 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
.DS_Store | ||
__pycache__/ | ||
*.py[cod] | ||
.scannerwork/ | ||
.venv/* | ||
data/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
ARG branch=latest | ||
ARG base=cccs/assemblyline-v4-service-base | ||
FROM $base:$branch | ||
|
||
ENV SERVICE_PATH vmray_service.VMRayService | ||
|
||
LABEL Name="vmray" | ||
LABEL Version=1.1 | ||
LABEL Remarks="This is a dockerfile for vmray as an AssemblyLine service" | ||
|
||
ARG al_version=4.4.stable | ||
|
||
USER root | ||
|
||
RUN apt-get update && apt-get install -y git gcc build-essential curl unzip libssl-dev && rm -rf /var/lib/apt/lists/* | ||
|
||
# Copy site-packages (should probably turn this into a requirements.txt) | ||
COPY site-packages.tgz / | ||
WORKDIR /usr/local/lib/python3.9/ | ||
RUN tar xf /site-packages.tgz | ||
|
||
# Python packages | ||
ARG PIP_INDEX_URL=https://pypi.python.org/simple | ||
COPY ./requirements.txt / | ||
RUN pip install --upgrade pip | ||
RUN pip install -r /requirements.txt --upgrade | ||
|
||
USER assemblyline | ||
|
||
# Copy files over | ||
WORKDIR /opt/al_service | ||
COPY vmray_service.py . | ||
COPY service_manifest.yml . | ||
|
||
USER root | ||
RUN sed -i -e "s/\$SERVICE_TAG/$version/g" service_manifest.yml | ||
|
||
# Switch to assemblyline user | ||
USER assemblyline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# VMRay service | ||
|
||
VMRay is a malware analysis platform that uses a hypervisor-based sandbox environment to study the behavior of malicious files, URLs, and email attachments. Its agentless approach allows for a more stealthy analysis, reducing the chances of malware detecting it's in a sandbox. This results in detailed insights into malware operations, making it easier to identify threats. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
assemblyline_v4_service | ||
assemblyline_service_client | ||
vmray-rest-api |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# Name of the service | ||
name: VMRay | ||
# Version of the service | ||
version: $SERVICE_TAG | ||
# Description of the service | ||
description: VMRay service | ||
|
||
# Regex defining the types of files the service accepts and rejects | ||
accepts: executable/windows/(pe32|pe64) | ||
rejects: empty|metadata/.* | ||
|
||
# At which stage the service should run (one of: FILTER, EXTRACT, CORE, SECONDARY, POST) | ||
# NOTE: Stages are executed in the order defined in the list | ||
stage: CORE | ||
# Which category the service is part of (one of: Antivirus, Dynamic Analysis, External, Extraction, Filtering, Networking, Static Analysis) | ||
category: Dynamic Analysis | ||
|
||
# Does the service require access to the file to perform its task | ||
# If set to false, the service will only have access to the file metadata (e.g. Hashes, size, type, ...) | ||
file_required: true | ||
# Maximum execution time the service has before it's considered to be timed out | ||
timeout: 10 | ||
# Does the service force the caching of results to be disabled | ||
# (only use for service that will always provided different results each run) | ||
disable_cache: false | ||
|
||
# is the service enabled by default | ||
enabled: true | ||
# does the service make APIs call to other product not part of the assemblyline infrastructure (e.g. VirusTotal, ...) | ||
is_external: false | ||
# Number of concurrent services allowed to run at the same time | ||
licence_count: 0 | ||
|
||
# Service configuration block (dictionary of config variables) | ||
# NOTE: The key names can be anything and the value can be of any types | ||
config: | ||
vmray_service_url: "" | ||
vmray_service_api_key: "" | ||
verify_certificate: true | ||
# Submission params block: | ||
# A list of submission param object that define parameters | ||
# that the user can change about the service for each of its scans | ||
# SUPPORTED TYPES: bool, int, str, list | ||
# submission_params: | ||
# - default: "" | ||
# name: password | ||
# type: str | ||
# value: "" | ||
# - default: false | ||
# name: extra_work | ||
# type: bool | ||
# value: false | ||
|
||
# Service heuristic blocks: List of heuristics object that define the different heuristics used in the service | ||
# heuristics: | ||
# - description: This suspicious heuristic fakes making as a PDF | ||
# filetype: "*" | ||
# heur_id: 1 | ||
# name: Masks has PDF | ||
# score: 10 | ||
# # Even if a signature fires multiple time, this is the max score for a section | ||
# max_score: 1000 | ||
# - description: This malicious heuristic fakes dropping and side-loading a DLL and has an Att&ck ID associated with it | ||
# filetype: "*" | ||
# heur_id: 2 | ||
# name: Drops an exe | ||
# score: 1000 | ||
# attack_id: T1073 | ||
# - description: This informational heuristic fakes extracting a configuration block | ||
# filetype: "*" | ||
# heur_id: 3 | ||
# name: Extraction config information | ||
# score: 10 | ||
# # if a signature is associated to this heuristic and is present in that map it gets a different score | ||
# signature_score_map: | ||
# sig_three: 30 | ||
# sig_four: 40 | ||
# - description: This suspicious heuristic fakes decoding a configuration block and has an Att&ck ID associated with it | ||
# filetype: "*" | ||
# heur_id: 4 | ||
# name: Config decoding | ||
# score: 100 | ||
# attack_id: [T1027, T1127] | ||
# - description: This suspicious heuristic fakes an high entropy pe section | ||
# filetype: "*" | ||
# heur_id: 5 | ||
# name: High entropy PE section | ||
# score: 100 | ||
# - description: Suspicious string found during OCR analysis | ||
# filetype: "*" | ||
# heur_id: 6 | ||
# name: Suspicious OCR Strings | ||
# score: 100 | ||
# - description: Safe file detected | ||
# filetype: "*" | ||
# heur_id: 7 | ||
# name: Safe file detected | ||
# score: -1000 | ||
|
||
# Docker configuration block which defines: | ||
# - the name of the docker container that will be created | ||
# - cpu and ram allocation by the container | ||
docker_config: | ||
allow_internet_access: true | ||
image: ${REGISTRY}fbicyber/assemblyline-service-vmray:$SERVICE_TAG | ||
cpu_cores: 1.0 | ||
ram_mb_min: 128 | ||
ram_mb: 256 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
import base64 | ||
import time | ||
from collections import defaultdict | ||
from datetime import datetime, timedelta | ||
from enum import Enum | ||
from typing import List, Tuple | ||
|
||
from assemblyline_v4_service.common.base import ServiceBase | ||
from assemblyline_v4_service.common.result import Result, ResultJSONSection | ||
from vmray.rest_api import VMRayRESTAPI, VMRayRESTAPIError | ||
|
||
|
||
class VMRayService(ServiceBase): | ||
|
||
class JobType(Enum): | ||
JOBS = ("jobs", "job_id", "job") # Dynamic or Web Analysis jobs | ||
# MD_JOBS = ... Deprecated in VMRay | ||
REPUTATION_JOBS = ("reputation_jobs", "reputation_job_id", "reputation_job") | ||
STATIC_JOBS = ("static_jobs", "job_id", "job") # Shares the same structure with dynamic jobs | ||
VT_JOBS = ("vt_jobs", "vt_job_id", "vt_job") # Virtus Total | ||
# Unclear how this part of the rest API works. There don't seem to be any corresponding rest endpoints | ||
# WHOIS_JOBS = (...) | ||
|
||
VMRAY_SERVICE_URL_CONFIG_KEY: str = "vmray_service_url" | ||
VMRAY_SERVICE_API_KEY_CONFIG_KEY: str = "vmray_service_api_key" | ||
|
||
def __init__(self, config=None): | ||
super(VMRayService, self).__init__(config) | ||
|
||
self.vmray_service_url: str = config.get(self.VMRAY_SERVICE_URL_CONFIG_KEY) | ||
self.vmray_api_key: str = config.get(self.VMRAY_SERVICE_API_KEY_CONFIG_KEY) | ||
self.verify = self.config.get("verify_certificate", True) | ||
|
||
if not self.vmray_service_url: | ||
raise RuntimeError("VMRay service URL not set in the config. Check the config section in the manifest?") | ||
|
||
if not self.vmray_api_key: | ||
raise RuntimeError("VMRay service API key not set in the config. Check the config section in the manifest?") | ||
|
||
def start(self): | ||
self.log.info(f"start() from {self.service_attributes.name} service called") | ||
|
||
def execute(self, request): | ||
|
||
# The -15s is to give a bit of a margin before the timeout to collect and return some sort of status | ||
timeout_time = datetime.now() + timedelta(minutes=self.service_attributes.timeout) - timedelta(minutes=1) | ||
|
||
self.log.info(f"execute() from {self.service_attributes.name} service called for '{request.file_name}'") | ||
|
||
args = {"shareable": True, # indicates whether the hash of the sample will be shared with VirusTotal. | ||
"reanalyze": True # indicates whether a duplicate submission will create analysis jobs | ||
} | ||
|
||
try: | ||
with open(request.file_path, "rb") as sample_file_object: | ||
args["sample_file"] = sample_file_object | ||
args["sample_filename_b64enc"] = base64.b64encode(request.file_name.encode("utf-8")).decode("utf-8") | ||
|
||
try: | ||
api = VMRayRESTAPI(self.vmray_service_url, | ||
self.vmray_api_key, | ||
verify_cert=self.verify) | ||
|
||
vmray_data = self.submit_sample(api, args) | ||
except Exception as ex: | ||
raise Exception(f"VMRay failed to process '{request.file_name}': {str(ex)}") | ||
|
||
errors = vmray_data.get("errors") | ||
if errors: | ||
errors = [error["error_msg"] for error in errors] | ||
message = f"VMRay failed to process '{request.file_name}': " + errors[0] \ | ||
if len(errors) == 1 \ | ||
else "\n" + "\n".join(f" - {error}" for error in errors) | ||
raise Exception(message) | ||
|
||
vmray_submission_id: str = vmray_data["submissions"][0]["submission_id"] | ||
vmray_submission_original_filename: str = vmray_data["submissions"][0]["submission_original_filename"] | ||
|
||
running_job_ids = defaultdict(list) | ||
|
||
job_count: int = 0 | ||
|
||
for _name, job_type in self.JobType.__members__.items(): | ||
job_category_key, job_id_key, _job_rest_endpoint = job_type.value | ||
if job_category_key in vmray_data: | ||
for job in vmray_data[job_category_key]: | ||
running_job_ids[job_type].append(job[job_id_key]) | ||
job_count += 1 | ||
|
||
self.log.info(f"VMRay created {job_count} job(s) for the submission '{vmray_submission_original_filename}' " | ||
f"(vmray id: {vmray_submission_id}):") | ||
for job_type, job_ids in running_job_ids.items(): | ||
self.log.info(f"{job_type.value[1]}(s): {','.join([str(job_id) for job_id in job_ids])}") | ||
|
||
finished_jobs = [] | ||
|
||
while running_job_ids: | ||
|
||
# create a copy of the jobs, so the original can be modified in the loop | ||
current_jobs: List[Tuple[self.JobType, List[int]]] = list(running_job_ids.items()) | ||
|
||
unfinished_jobs = [] | ||
|
||
for job_tuple in current_jobs: | ||
|
||
job_type: self.JobType = job_tuple[0] | ||
job_ids: List[int] = job_tuple[1] | ||
_job_category_key, _job_id_key, job_rest_endpoint = job_type.value | ||
|
||
for job_id in job_ids: | ||
|
||
analysis = self.get_job_analysis(api, job_id) | ||
if analysis: | ||
self.log.info(f"VMRay finished analysis for {job_rest_endpoint} ({job_id_key}: {job_id}) " | ||
f"for the submission '{vmray_submission_original_filename}' (vmray id: " | ||
f"{vmray_submission_id})") | ||
finished_jobs.append(analysis) | ||
running_job_ids[job_type].remove(job_id) | ||
else: | ||
self.log.info(f"VMRay hasn't finished analysis for {job_rest_endpoint} ({job_id_key}: " | ||
f"{job_id}) for the submission '{vmray_submission_original_filename}' (vmray " | ||
f"id: {vmray_submission_id})") | ||
unfinished_jobs.append(self.get_job_status(api, job_id, job_rest_endpoint)) | ||
|
||
if not running_job_ids[job_type]: # no more jobs for this job type | ||
del running_job_ids[job_type] | ||
|
||
if datetime.now() >= timeout_time: | ||
break | ||
|
||
time.sleep(10) # Wait before the next round of calls to prevent hammering the server | ||
|
||
result = Result() | ||
|
||
finished_json_section = ResultJSONSection('VMRay Response') | ||
finished_json_section.set_json(finished_jobs) | ||
result.add_section(finished_json_section) | ||
|
||
if unfinished_jobs: | ||
self.log.warn(f"VMRay wasn't able to complete the following {len(unfinished_jobs)} of" | ||
f" {len(finished_jobs) + len(unfinished_jobs)} before the " | ||
f"~{self.service_attributes.timeout} minute timeout: {unfinished_jobs}") | ||
unfinished_json_section = ResultJSONSection( | ||
"VMRay jobs not completed before the AssemblyLine service timeout of " | ||
f" ~{self.service_attributes.timeout} minutes.") | ||
unfinished_json_section.set_json(unfinished_jobs) | ||
result.add_section(unfinished_json_section) | ||
|
||
self.log.debug(result) | ||
|
||
request.result = result | ||
except Exception as ex: | ||
self.log.error(str(ex)) | ||
raise | ||
|
||
def submit_sample(self, api, args): | ||
''' Submit the sample to VMRay''' | ||
return api.call("POST", "/rest/sample/submit", args) | ||
|
||
def get_job_analysis(self, api, job_id: int): | ||
try: | ||
return api.call("GET", f"/rest/analysis/job/{job_id}") | ||
except VMRayRESTAPIError as error: | ||
if error.args[0] == "No such element": | ||
return None | ||
else: | ||
raise | ||
|
||
def get_job_status(self, api, job_id: int, endpoint: str): | ||
try: | ||
return api.call("GET", f"/rest/{endpoint}/{job_id}") | ||
except VMRayRESTAPIError as error: | ||
if error.args[0] == "No such element": | ||
return None | ||
else: | ||
raise |