Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
CyDefender committed Feb 12, 2024
0 parents commit bbd3864
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.DS_Store
__pycache__/
*.py[cod]
.scannerwork/
.venv/*
data/
39 changes: 39 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ARG branch=latest
ARG base=cccs/assemblyline-v4-service-base
FROM $base:$branch

ENV SERVICE_PATH vmray_service.VMRayService

LABEL Name="vmray"
LABEL Version=1.1
LABEL Remarks="This is a dockerfile for vmray as an AssemblyLine service"

ARG al_version=4.4.stable

USER root

RUN apt-get update && apt-get install -y git gcc build-essential curl unzip libssl-dev && rm -rf /var/lib/apt/lists/*

# Copy site-packages (should probably turn this into a requirements.txt)
COPY site-packages.tgz /
WORKDIR /usr/local/lib/python3.9/
RUN tar xf /site-packages.tgz

# Python packages
ARG PIP_INDEX_URL=https://pypi.python.org/simple
COPY ./requirements.txt /
RUN pip install --upgrade pip
RUN pip install -r /requirements.txt --upgrade

USER assemblyline

# Copy files over
WORKDIR /opt/al_service
COPY vmray_service.py .
COPY service_manifest.yml .

USER root
RUN sed -i -e "s/\$SERVICE_TAG/$version/g" service_manifest.yml

# Switch to assemblyline user
USER assemblyline
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# VMRay service

VMRay is a malware analysis platform that uses a hypervisor-based sandbox environment to study the behavior of malicious files, URLs, and email attachments. Its agentless approach allows for a more stealthy analysis, reducing the chances of malware detecting it's in a sandbox. This results in detailed insights into malware operations, making it easier to identify threats.
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
assemblyline_v4_service
assemblyline_service_client
vmray-rest-api
108 changes: 108 additions & 0 deletions service_manifest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Name of the service
name: VMRay
# Version of the service
version: $SERVICE_TAG
# Description of the service
description: VMRay service

# Regex defining the types of files the service accepts and rejects
accepts: executable/windows/(pe32|pe64)
rejects: empty|metadata/.*

# At which stage the service should run (one of: FILTER, EXTRACT, CORE, SECONDARY, POST)
# NOTE: Stages are executed in the order defined in the list
stage: CORE
# Which category the service is part of (one of: Antivirus, Dynamic Analysis, External, Extraction, Filtering, Networking, Static Analysis)
category: Dynamic Analysis

# Does the service require access to the file to perform its task
# If set to false, the service will only have access to the file metadata (e.g. Hashes, size, type, ...)
file_required: true
# Maximum execution time the service has before it's considered to be timed out
timeout: 10
# Does the service force the caching of results to be disabled
# (only use for service that will always provided different results each run)
disable_cache: false

# is the service enabled by default
enabled: true
# does the service make APIs call to other product not part of the assemblyline infrastructure (e.g. VirusTotal, ...)
is_external: false
# Number of concurrent services allowed to run at the same time
licence_count: 0

# Service configuration block (dictionary of config variables)
# NOTE: The key names can be anything and the value can be of any types
config:
vmray_service_url: ""
vmray_service_api_key: ""
verify_certificate: true
# Submission params block:
# A list of submission param object that define parameters
# that the user can change about the service for each of its scans
# SUPPORTED TYPES: bool, int, str, list
# submission_params:
# - default: ""
# name: password
# type: str
# value: ""
# - default: false
# name: extra_work
# type: bool
# value: false

# Service heuristic blocks: List of heuristics object that define the different heuristics used in the service
# heuristics:
# - description: This suspicious heuristic fakes making as a PDF
# filetype: "*"
# heur_id: 1
# name: Masks has PDF
# score: 10
# # Even if a signature fires multiple time, this is the max score for a section
# max_score: 1000
# - description: This malicious heuristic fakes dropping and side-loading a DLL and has an Att&ck ID associated with it
# filetype: "*"
# heur_id: 2
# name: Drops an exe
# score: 1000
# attack_id: T1073
# - description: This informational heuristic fakes extracting a configuration block
# filetype: "*"
# heur_id: 3
# name: Extraction config information
# score: 10
# # if a signature is associated to this heuristic and is present in that map it gets a different score
# signature_score_map:
# sig_three: 30
# sig_four: 40
# - description: This suspicious heuristic fakes decoding a configuration block and has an Att&ck ID associated with it
# filetype: "*"
# heur_id: 4
# name: Config decoding
# score: 100
# attack_id: [T1027, T1127]
# - description: This suspicious heuristic fakes an high entropy pe section
# filetype: "*"
# heur_id: 5
# name: High entropy PE section
# score: 100
# - description: Suspicious string found during OCR analysis
# filetype: "*"
# heur_id: 6
# name: Suspicious OCR Strings
# score: 100
# - description: Safe file detected
# filetype: "*"
# heur_id: 7
# name: Safe file detected
# score: -1000

# Docker configuration block which defines:
# - the name of the docker container that will be created
# - cpu and ram allocation by the container
docker_config:
allow_internet_access: true
image: ${REGISTRY}fbicyber/assemblyline-service-vmray:$SERVICE_TAG
cpu_cores: 1.0
ram_mb_min: 128
ram_mb: 256
Binary file added site-packages.tgz
Binary file not shown.
176 changes: 176 additions & 0 deletions vmray_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import base64
import time
from collections import defaultdict
from datetime import datetime, timedelta
from enum import Enum
from typing import List, Tuple

from assemblyline_v4_service.common.base import ServiceBase
from assemblyline_v4_service.common.result import Result, ResultJSONSection
from vmray.rest_api import VMRayRESTAPI, VMRayRESTAPIError


class VMRayService(ServiceBase):

class JobType(Enum):
JOBS = ("jobs", "job_id", "job") # Dynamic or Web Analysis jobs
# MD_JOBS = ... Deprecated in VMRay
REPUTATION_JOBS = ("reputation_jobs", "reputation_job_id", "reputation_job")
STATIC_JOBS = ("static_jobs", "job_id", "job") # Shares the same structure with dynamic jobs
VT_JOBS = ("vt_jobs", "vt_job_id", "vt_job") # Virtus Total
# Unclear how this part of the rest API works. There don't seem to be any corresponding rest endpoints
# WHOIS_JOBS = (...)

VMRAY_SERVICE_URL_CONFIG_KEY: str = "vmray_service_url"
VMRAY_SERVICE_API_KEY_CONFIG_KEY: str = "vmray_service_api_key"

def __init__(self, config=None):
super(VMRayService, self).__init__(config)

self.vmray_service_url: str = config.get(self.VMRAY_SERVICE_URL_CONFIG_KEY)
self.vmray_api_key: str = config.get(self.VMRAY_SERVICE_API_KEY_CONFIG_KEY)
self.verify = self.config.get("verify_certificate", True)

if not self.vmray_service_url:
raise RuntimeError("VMRay service URL not set in the config. Check the config section in the manifest?")

if not self.vmray_api_key:
raise RuntimeError("VMRay service API key not set in the config. Check the config section in the manifest?")

def start(self):
self.log.info(f"start() from {self.service_attributes.name} service called")

def execute(self, request):

# The -15s is to give a bit of a margin before the timeout to collect and return some sort of status
timeout_time = datetime.now() + timedelta(minutes=self.service_attributes.timeout) - timedelta(minutes=1)

self.log.info(f"execute() from {self.service_attributes.name} service called for '{request.file_name}'")

args = {"shareable": True, # indicates whether the hash of the sample will be shared with VirusTotal.
"reanalyze": True # indicates whether a duplicate submission will create analysis jobs
}

try:
with open(request.file_path, "rb") as sample_file_object:
args["sample_file"] = sample_file_object
args["sample_filename_b64enc"] = base64.b64encode(request.file_name.encode("utf-8")).decode("utf-8")

try:
api = VMRayRESTAPI(self.vmray_service_url,
self.vmray_api_key,
verify_cert=self.verify)

vmray_data = self.submit_sample(api, args)
except Exception as ex:
raise Exception(f"VMRay failed to process '{request.file_name}': {str(ex)}")

errors = vmray_data.get("errors")
if errors:
errors = [error["error_msg"] for error in errors]
message = f"VMRay failed to process '{request.file_name}': " + errors[0] \
if len(errors) == 1 \
else "\n" + "\n".join(f" - {error}" for error in errors)
raise Exception(message)

vmray_submission_id: str = vmray_data["submissions"][0]["submission_id"]
vmray_submission_original_filename: str = vmray_data["submissions"][0]["submission_original_filename"]

running_job_ids = defaultdict(list)

job_count: int = 0

for _name, job_type in self.JobType.__members__.items():
job_category_key, job_id_key, _job_rest_endpoint = job_type.value
if job_category_key in vmray_data:
for job in vmray_data[job_category_key]:
running_job_ids[job_type].append(job[job_id_key])
job_count += 1

self.log.info(f"VMRay created {job_count} job(s) for the submission '{vmray_submission_original_filename}' "
f"(vmray id: {vmray_submission_id}):")
for job_type, job_ids in running_job_ids.items():
self.log.info(f"{job_type.value[1]}(s): {','.join([str(job_id) for job_id in job_ids])}")

finished_jobs = []

while running_job_ids:

# create a copy of the jobs, so the original can be modified in the loop
current_jobs: List[Tuple[self.JobType, List[int]]] = list(running_job_ids.items())

unfinished_jobs = []

for job_tuple in current_jobs:

job_type: self.JobType = job_tuple[0]
job_ids: List[int] = job_tuple[1]
_job_category_key, _job_id_key, job_rest_endpoint = job_type.value

for job_id in job_ids:

analysis = self.get_job_analysis(api, job_id)
if analysis:
self.log.info(f"VMRay finished analysis for {job_rest_endpoint} ({job_id_key}: {job_id}) "
f"for the submission '{vmray_submission_original_filename}' (vmray id: "
f"{vmray_submission_id})")
finished_jobs.append(analysis)
running_job_ids[job_type].remove(job_id)
else:
self.log.info(f"VMRay hasn't finished analysis for {job_rest_endpoint} ({job_id_key}: "
f"{job_id}) for the submission '{vmray_submission_original_filename}' (vmray "
f"id: {vmray_submission_id})")
unfinished_jobs.append(self.get_job_status(api, job_id, job_rest_endpoint))

if not running_job_ids[job_type]: # no more jobs for this job type
del running_job_ids[job_type]

if datetime.now() >= timeout_time:
break

time.sleep(10) # Wait before the next round of calls to prevent hammering the server

result = Result()

finished_json_section = ResultJSONSection('VMRay Response')
finished_json_section.set_json(finished_jobs)
result.add_section(finished_json_section)

if unfinished_jobs:
self.log.warn(f"VMRay wasn't able to complete the following {len(unfinished_jobs)} of"
f" {len(finished_jobs) + len(unfinished_jobs)} before the "
f"~{self.service_attributes.timeout} minute timeout: {unfinished_jobs}")
unfinished_json_section = ResultJSONSection(
"VMRay jobs not completed before the AssemblyLine service timeout of "
f" ~{self.service_attributes.timeout} minutes.")
unfinished_json_section.set_json(unfinished_jobs)
result.add_section(unfinished_json_section)

self.log.debug(result)

request.result = result
except Exception as ex:
self.log.error(str(ex))
raise

def submit_sample(self, api, args):
''' Submit the sample to VMRay'''
return api.call("POST", "/rest/sample/submit", args)

def get_job_analysis(self, api, job_id: int):
try:
return api.call("GET", f"/rest/analysis/job/{job_id}")
except VMRayRESTAPIError as error:
if error.args[0] == "No such element":
return None
else:
raise

def get_job_status(self, api, job_id: int, endpoint: str):
try:
return api.call("GET", f"/rest/{endpoint}/{job_id}")
except VMRayRESTAPIError as error:
if error.args[0] == "No such element":
return None
else:
raise

0 comments on commit bbd3864

Please sign in to comment.