diff --git a/assemblyline/odm/models/result.py b/assemblyline/odm/models/result.py index 762326c7e..7dd41edfd 100644 --- a/assemblyline/odm/models/result.py +++ b/assemblyline/odm/models/result.py @@ -32,112 +32,109 @@ constants = forge.get_constants() -@odm.model(index=True, store=False) +@odm.model(index=True, store=False, description="Represents cyber attack tactics or techniques as identified by the MITRE ATT&CK framework.") class Attack(odm.Model): - attack_id = odm.Keyword(copyto="__text__", description="ID", ai=False) - pattern = odm.Keyword(copyto="__text__", description="Pattern Name") - categories = odm.List(odm.Keyword(), description="Categories") + attack_id = odm.Keyword(copyto="__text__", description="Unique identifier corresponding to a specific tactic or technique in the MITRE ATT&CK framework.", ai=False) + pattern = odm.Keyword(copyto="__text__", description="The name of the MITRE ATT&CK pattern that is associated with the detected malware or malicious activity.") + categories = odm.List(odm.Keyword(), description="A list of categories that describe and classify the nature of the cyber attack based on the MITRE ATT&CK framework.") -@odm.model(index=True, store=False, description="Heuristic Signatures") +@odm.model(index=True, store=False, description="Describes a signature that has matched, indicating potential malicious behavior.") class Signature(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the signature that triggered the heuristic") - frequency = odm.Integer(default=1, description="Number of times this signature triggered the heuristic") - safe = odm.Boolean(default=False, description="Is the signature safelisted or not") + name = odm.Keyword(copyto="__text__", description="Name of the detection signature that triggered the heuristic.") + frequency = odm.Integer(default=1, description="The count of how many times this particular signature has triggered the heuristic during analysis.") + safe = odm.Boolean(default=False, description="A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score.") @odm.model(index=True, store=False, description="Heuristic associated to the Section") class Heuristic(odm.Model): - heur_id = odm.Keyword(copyto="__text__", description="ID of the heuristic triggered", ai=False) - name = odm.Keyword(copyto="__text__", description="Name of the heuristic") - attack = odm.List(odm.Compound(Attack), default=[], description="List of Att&ck IDs related to this heuristic") + heur_id = odm.Keyword(copyto="__text__", description="Unique identifier of the heuristic that was triggered, contributing to the overall assessment of potential maliciousness.", ai=False) + name = odm.Keyword(copyto="__text__", description="The name of the heuristic rule that was triggered during the analysis.") + attack = odm.List(odm.Compound(Attack), default=[], description="A list of MITRE ATT&CK identifiers that are associated with this heuristic, linking detected behavior to known techniques.") signature = odm.List(odm.Compound(Signature), default=[], - description="List of signatures that triggered the heuristic", ai=False) - score = odm.Integer(description="Calculated Heuristic score") + description="A list of signatures whose detection has contributed to the triggering of this heuristic.", ai=False) + score = odm.Integer(description="The score assigned by this heuristic, which contributes to the overall threat assessment of the analyzed artifact.") @odm.model(index=True, store=False, description="Result Section") class Section(odm.Model): - auto_collapse = odm.Boolean(default=False, description="Should the section be collapsed when displayed?", ai=False) - body = odm.Optional(odm.Text(copyto="__text__"), description="Text body of the result section") - classification = odm.Classification(description="Classification of the section", ai=False) - body_format = odm.Enum(values=BODY_FORMAT, index=False, description="Type of body in this section") - body_config = odm.Optional(odm.Mapping(odm.Any(), index=False, - description="Configurations for the body of this section"), ai=False) - depth = odm.Integer(index=False, description="Depth of the section", ai=False) - heuristic = odm.Optional(odm.Compound(Heuristic), description="Heuristic used to score result section") - tags = odm.Compound(Tagging, default={}, description="List of tags associated to this section") - safelisted_tags = odm.FlattenedListObject(store=False, default={}, description="List of safelisted tags", ai=False) - title_text = odm.Text(copyto="__text__", description="Title of the section") + auto_collapse = odm.Boolean(default=False, description="Indicates whether the section should be initially displayed as collapsed in the user interface.", ai=False) + body = odm.Optional(odm.Text(copyto="__text__"), description="The main content of the result section, which may include detailed analysis findings or descriptions.") + classification = odm.Classification(description="The classification level assigned to the information within the section, dictating who can view it.", ai=False) + body_format = odm.Enum(values=BODY_FORMAT, index=False, description="The format of the body content, such as text, JSON, or image, which determines how it is displayed.") + body_config = odm.Optional(odm.Mapping(odm.Any(), index=False), + description="Additional configurations that specify how the body content should be rendered or processed.", ai=False) + depth = odm.Integer(index=False, description="The nesting level of the section within the overall result hierarchy, used for organizing complex results.", ai=False) + heuristic = odm.Optional(odm.Compound(Heuristic), description="The heuristic analysis that contributed to the scoring of this section, if applicable.") + tags = odm.Compound(Tagging, default={}, description="A collection of tags that categorize or label the section based on the analysis findings.") + safelisted_tags = odm.FlattenedListObject(store=False, default={}, description="Tags that have been deemed safe and are excluded from contributing to the overall threat score.", ai=False) + title_text = odm.Text(copyto="__text__", description="The title of the section, summarizing its content or purpose.") promote_to = odm.Optional(odm.Enum( - values=PROMOTE_TO, - description="This is the type of data that the current section should be promoted to.", ai=False)) + values=PROMOTE_TO, ai=False), description="The category of data that this section's content should be elevated to for reporting or further analysis.") @odm.model(index=True, store=True, description="Result Body") class ResultBody(odm.Model): - score = odm.Integer(default=0, description="Aggregate of the score for all heuristics") - sections = odm.List(odm.Compound(Section), default=[], description="List of sections") + score = odm.Integer(default=0, description="The total score calculated from all heuristics applied, indicating overall severity.") + sections = odm.List(odm.Compound(Section), default=[], description="An ordered list of Section objects that detail the analysis results.") @odm.model(index=False, store=False, description="Service Milestones") class Milestone(odm.Model): - service_started = odm.Date(default="NOW", description="Date the service started scanning") - service_completed = odm.Date(default="NOW", description="Date the service finished scanning") + service_started = odm.Date(default="NOW", description="Timestamp marking when the service began its analysis of the artifact.") + service_completed = odm.Date(default="NOW", description="Timestamp marking when the service completed its analysis, signaling the end of processing for the artifact.") @odm.model(index=True, store=False, description="File related to the Response") class File(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the file") - sha256 = odm.SHA256(copyto="__text__", description="SHA256 of the file") - description = odm.Text(copyto="__text__", description="Description of the file") - classification = odm.Classification(description="Classification of the file", ai=False) + name = odm.Keyword(copyto="__text__", description="The original name of the file being analyzed or generated during the analysis process.") + sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file, serving as a unique identifier for the content.") + description = odm.Text(copyto="__text__", description="A brief description of the file's purpose or contents, especially if it is an output of the analysis.") + classification = odm.Classification(description="The classification level of the file, indicating the sensitivity of its contents.", ai=False) is_section_image = odm.Boolean(default=False, - description="Is this an image used in an Image Result Section?", ai=False) + description="A flag indicating whether the file is an image that is used within an image-based result section.", ai=False) # Possible values for PARENT_RELATION can be found in # assemblyline-v4-service/assemblyline_v4_service/common/task.py. parent_relation = odm.Text( default="EXTRACTED", - description="File relation to parent, if any.\ -
Values: `\"ROOT\", \"EXTRACTED\", \"INFORMATION\", \"DYNAMIC\", \"MEMDUMP\", \"DOWNLOADED\"`", ai=False + description="Describes the relationship of this file to the parent file, such as `EXTRACTED` or `DOWNLOADED`.", ai=False ) allow_dynamic_recursion = odm.Boolean( default=False, - description="Allow file to be analysed during Dynamic Analysis" - "even if Dynamic Recursion Prevention is enabled.", ai=False) + description="Specifies whether the file can be analyzed during dynamic analysis, even with recursion prevention.", ai=False) @odm.model(index=True, store=True, description="Response Body of Result") class ResponseBody(odm.Model): - milestones = odm.Compound(Milestone, default={}, description="Milestone block", ai=False) - service_version = odm.Keyword(store=False, description="Version of the service", ai=False) - service_name = odm.Keyword(copyto="__text__", description="Name of the service that scanned the file") + milestones = odm.Compound(Milestone, default={}, description="A set of key timestamps that mark important stages in the service's processing of the file.", ai=False) + service_version = odm.Keyword(store=False, description="The version of the service that performed the analysis, important for tracking analysis provenance.", ai=False) + service_name = odm.Keyword(copyto="__text__", description="The name of the service that conducted the analysis, useful for identifying the source of the results.") service_tool_version = odm.Optional( odm.Keyword(copyto="__text__"), - description="Tool version of the service", ai=False) - supplementary = odm.List(odm.Compound(File), default=[], description="List of supplementary files", ai=False) - extracted = odm.List(odm.Compound(File), default=[], description="List of extracted files") + description="The specific version of the analytical tool used by the service, if applicable.", ai=False) + supplementary = odm.List(odm.Compound(File), default=[], description="A list of additional files generated during analysis that support the main findings.", ai=False) + extracted = odm.List(odm.Compound(File), default=[], description="A list of files that were extracted from the analyzed artifact during the service's processing.") service_context = odm.Optional( odm.Keyword(index=False, store=False), - description="Context about the service", ai=False) + description="Additional context or metadata about the service's execution environment or configuration.", ai=False) service_debug_info = odm.Optional( odm.Keyword(index=False, store=False), - description="Debug info about the service", ai=False) + description="Information that can be used for debugging or understanding the service's analysis process.", ai=False) @odm.model(index=True, store=True, description="Result Model") class Result(odm.Model): - archive_ts = odm.Optional(odm.Date(description="Time at which the result was archived", ai=False)) - classification = odm.Classification(description="Aggregate classification for the result", ai=False) - created = odm.Date(default="NOW", description="Date at which the result object got created", ai=False) - expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp", ai=False) - response: ResponseBody = odm.compound(ResponseBody, description="The body of the response from the service") - result: ResultBody = odm.compound(ResultBody, default={}, description="The result body") - sha256 = odm.SHA256(store=False, description="SHA256 of the file the result object relates to") - type = odm.Optional(odm.Keyword()) - size = odm.Optional(odm.Integer()) - drop_file = odm.Boolean(default=False, description="Use to not pass to other stages after this run", ai=False) - from_archive = odm.Boolean(index=False, default=False, description="Was loaded from the archive", ai=False) + archive_ts = odm.Optional(odm.Date(ai=False), description="The timestamp when the result was moved to long-term storage or archived.") + classification = odm.Classification(description="The highest classification level assigned to any part of the result, dictating overall access control.", ai=False) + created = odm.Date(default="NOW", description="The creation timestamp for the result record, marking when the analysis result was first generated.", ai=False) + expiry_ts = odm.Optional(odm.Date(store=False), description="The timestamp when the result is scheduled to be purged or deleted from the system.", ai=False) + response: ResponseBody = odm.compound(ResponseBody, description="The container for all the response data provided by the service after analyzing the file.") + result: ResultBody = odm.compound(ResultBody, default={}, description="The container for the detailed results of the analysis, including sections and scores.") + sha256 = odm.SHA256(store=False, description="The SHA256 hash of the file that was analyzed, linking the result to the specific artifact.") + type = odm.Optional(odm.Keyword(), description="The MIME type or other file classification identified by Assemblyline that is linked to the result, providing insight into the file's content or format.") + size = odm.Optional(odm.Integer(), description="The size (in bytes) of the analyzed file pertinent to the result.") + drop_file = odm.Boolean(default=False, description="A flag indicating whether the file should be excluded from subsequent analysis stages.", ai=False) + from_archive = odm.Boolean(index=False, default=False, description="Indicates whether the result was retrieved from an archive rather than produced from a recent analysis.", ai=False) def build_key(self, service_tool_version=None, task=None): return self.help_build_key( diff --git a/assemblyline/odm/models/signature.py b/assemblyline/odm/models/signature.py index 341ffed25..a7a70b374 100644 --- a/assemblyline/odm/models/signature.py +++ b/assemblyline/odm/models/signature.py @@ -11,18 +11,23 @@ RULE_STATUSES = DEPLOYED_STATUSES + DRAFT_STATUSES + STALE_STATUSES -@odm.model(index=True, store=True) +@odm.model(index=True, store=True, description="""The Signature model within Assemblyline serves as a central framework for defining and managing security signatures, crucial components for the detection and analysis of malware. It provides a structured format that encompasses essential attributes such as a signature's name, classification level, source, and unique identifier. Moreover, it incorporates revision history, last modification timestamps, and statistical data to gauge performance and utility. + +The model presents users with a mix of static and dynamic information, ranging from immutable identification details to variable metadata reflecting the signature's current operational status and history of changes. This latter aspect is captured through fields that log the date and user associated with the last status update, offering a view into the signature's lifecycle. + +Understanding the Signature model is vital for cybersecurity professionals who are tasked with crafting precise Lucene-based search queries in Assemblyline. Mastery of this model's components will enable users to efficiently search, filter, and analyze signatures based on various parameters, thereby facilitating effective management and deployment in a cybersecurity context. +""") class Signature(odm.Model): - classification = odm.Classification(store=True, default=Classification.UNRESTRICTED) - data = odm.Text(index=False, store=False) - last_modified = odm.Date(default="NOW") - name = odm.Keyword(copyto="__text__") - order = odm.Integer(default=1, store=False) - revision = odm.Keyword(default="1") - signature_id = odm.Optional(odm.Keyword()) - source = odm.Keyword() - state_change_date = odm.Optional(odm.Date(store=False)) - state_change_user = odm.Optional(odm.Keyword(store=False)) - stats = odm.Compound(Statistics, default={}) - status = odm.Enum(values=RULE_STATUSES, copyto="__text__") - type = odm.Keyword(copyto="__text__") + classification = odm.Classification(store=True, default=Classification.UNRESTRICTED, description="Indicates the sensitivity level of the signature, which dictates who can access it based on their clearance.") + data = odm.Text(index=False, store=False, description="Stores the actual signature data or pattern used for malware detection.") + last_modified = odm.Date(default="NOW", description="Records the timestamp of the most recent update to the signature. Defaults to the current time when the signature is modified.") + name = odm.Keyword(copyto="__text__", description="A unique and descriptive name for the signature.") + order = odm.Integer(default=1, store=False, description="**TODO**:Lower number means higher priority? **Generated**:Determines the processing order of the signature relative to others. A lower number indicates higher priority.") + revision = odm.Keyword(default="1", description="Tracks the version of the signature, with the default starting value set to \"1\".") + signature_id = odm.Optional(odm.Keyword(), description="A unique identifier for the signature, which can be used for tracking and referencing purposes.") + source = odm.Keyword(description="Identifies the origin or the entity that provided the signature.") + state_change_date = odm.Optional(odm.Date(store=False), description="Captures the date when the signature's status was last updated. ") + state_change_user = odm.Optional(odm.Keyword(store=False), description="Records the username of the individual who last modified the signature's status.") + stats = odm.Compound(Statistics, default={}, description="Holds various statistical data related to the signature's performance and usage") + status = odm.Enum(values=RULE_STATUSES, copyto="__text__", description="Reflects the operational state of the signature, indicating whether it is deployed, in testing, or otherwise.") + type = odm.Keyword(copyto="__text__", description="Specifies the category or classification of the signature, which can be used for organizing and filtering signatures.") diff --git a/assemblyline/odm/models/statistics.py b/assemblyline/odm/models/statistics.py index 4fce5c2fd..253a3a43d 100644 --- a/assemblyline/odm/models/statistics.py +++ b/assemblyline/odm/models/statistics.py @@ -1,12 +1,17 @@ from assemblyline import odm -@odm.model(index=True, store=True, description="Statistics Model") +@odm.model(index=True, store=True, description="""The Statistics model within Assemblyline is a framework that gathers and organizes quantitative data from malware analysis. It offers users essential statistical information such as counts, minimum and maximum values, averages, and sums. These data points help to quantify different attributes of the analyzed events or items. + +In addition, the model includes fields that track the timing of events, such as when a particular signature was first seen or most recently seen. This information can be beneficial for tracking trends and patterns over time. + +By utilizing the Statistics model, users are equipped with the necessary data to perform searches and analyze malware using Lucene queries. This can aid in the efficient identification and examination of cyber threats. +""") class Statistics(odm.Model): - count = odm.Integer(default=0, description="Count of statistical hits") - min = odm.Integer(default=0, description="Minimum value of all stastical hits") - max = odm.Integer(default=0, description="Maximum value of all stastical hits") - avg = odm.Integer(default=0, description="Average of all stastical hits") - sum = odm.Integer(default=0, description="Sum of all stastical hits") - first_hit = odm.Optional(odm.Date(), description="Date of first hit of statistic") - last_hit = odm.Optional(odm.Date(), description="Date of last hit of statistic") + count = odm.Integer(default=0, description="Total number of times a particular data point or event has been recorded.") + min = odm.Integer(default=0, description="The smallest numerical value recorded among all statistical events.") + max = odm.Integer(default=0, description="The largest numerical value recorded among all statistical events.") + avg = odm.Integer(default=0, description="The mean value derived from the sum of all recorded statistical events divided by the count.") + sum = odm.Integer(default=0, description="The aggregate total of all numerical values from the statistical events.") + first_hit = odm.Optional(odm.Date(), description="The date when the first recorded instance of the statistical event occurred.") + last_hit = odm.Optional(odm.Date(), description="The date when the most recent instance of the statistical event was recorded.") diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 7acf03a89..1f43c8bf4 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -1,7 +1,10 @@ from __future__ import annotations + import hashlib + from assemblyline import odm from assemblyline.common import forge + Classification = forge.get_classification() SUBMISSION_STATES = ['failed', 'submitted', 'completed'] @@ -9,24 +12,25 @@ DEFAULT_RESUBMIT = [] -@odm.model(index=True, store=False, description="File Model of Submission") +@odm.model(index=True, store=False, description="""Contains details about the individual files included in the submission, including their names, sizes, and SHA256 hashes. +""") class File(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") - sha256 = odm.SHA256(copyto="__text__", description="SHA256 hash of the file") + name = odm.Keyword(copyto="__text__", description="The original name of the file as it was submitted.") + size = odm.Optional(odm.Integer(), description="The file's size in bytes.") + sha256 = odm.SHA256(copyto="__text__", description="The SHA256 cryptographic hash of the file, serving as a unique identifier for the file's content.") -@odm.model(index=False, store=False, description="Service Selection Scheme") +@odm.model(index=False, store=False, description="""Outlines the services selected for analysis, any excluded services, and any additional services that should be used in the case of rescan or resubmit actions. +""") class ServiceSelection(odm.Model): - selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="List of selected services") - excluded = odm.List(odm.Keyword(), default=[], description="List of excluded services") + selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="Services and/or service groups selected to process the submission, which determine the scope of analysis.") + excluded = odm.List(odm.Keyword(), default=[], description="Services and/or service groups explicitly excluded from processing the submission, bypassing their analysis.") rescan = odm.List( odm.Keyword(), default=[], - description="List of services to rescan when initial run scores as malicious") + description="Services and/or service groups to be used for a rescan if the submission's initial results are deemed malicious.") resubmit = odm.List(odm.Keyword(), default=DEFAULT_RESUBMIT, - description="Add to service selection when resubmitting") - runtime_excluded = odm.List(odm.Keyword(), default=[], description="List of runtime excluded services") + description="Additional services that are added to the selection when a submission is resubmitted.") # Fields in the parameters used to calculate hashes used for result caching @@ -44,46 +48,47 @@ class ServiceSelection(odm.Model): } -@odm.model(index=True, store=False, description="Submission Parameters") +@odm.model(index=True, store=False, description="""Specifies the parameters set at the time of submission, such as whether to perform a deep scan, if the submission should generate an alert, and any specific service selections or settings. +""") class SubmissionParams(odm.Model): classification = odm.Classification(default=Classification.UNRESTRICTED, - description="Original classification of the submission") - deep_scan = odm.Boolean(default=False, description="Should a deep scan be performed?") - description = odm.Text(store=True, copyto="__text__", description="Description of the submission") - generate_alert = odm.Boolean(default=False, description="Should this submission generate an alert?") - groups = odm.List(odm.Keyword(), default=[], description="List of groups related to this scan") - ignore_cache = odm.Boolean(default=False, description="Ignore the cached service results?") + description="The classification level assigned to the submission, signifying its sensitivity and dictating access controls.") + deep_scan = odm.Boolean(default=False, description="Enables a comprehensive examination of the submission by lifting standard safeguards and constraints, utilizing experimental methods and allowing for the exploration of potentially ambiguous findings to maximize the extraction of information.") + description = odm.Text(store=True, copyto="__text__", description="A narrative that outlines the content and purpose of the submission.") + generate_alert = odm.Boolean(default=False, description="Determines if an alert should be generated upon analysis completion.") + groups = odm.List(odm.Keyword(), default=[], description="A list of group identifiers relevant to this submission, often used for access control.") + ignore_cache = odm.Boolean(default=False, description="Bypasses any cached results for services, forcing all services to process the submission anew.") ignore_recursion_prevention = odm.Boolean( - default=False, description="Should we ignore recursion prevention?") + default=False, description="Overrides the default mechanism that prevents recursive scanning of extracted files.") # TODO: The following three lines can be removed after assemblyline upgrade to 4.6+ ignore_dynamic_recursion_prevention = odm.Boolean( - default=False, description="Should we ignore dynamic recursion prevention?") - - ignore_filtering = odm.Boolean(default=False, description="Should we ignore filtering services?") - ignore_size = odm.Boolean(default=False, description="Ignore the file size limits?") - never_drop = odm.Boolean(default=False, description="Exempt from being dropped by ingester?") - malicious = odm.Boolean(default=False, description="Is the file submitted already known to be malicious?") - max_extracted = odm.Integer(default=500, description="Max number of extracted files") - max_supplementary = odm.Integer(default=500, description="Max number of supplementary files") - priority = odm.Integer(default=1000, description="Priority of the scan") - profile = odm.Boolean(default=False, description="Should the submission do extra profiling?") - psid = odm.Optional(odm.UUID(), description="Parent submission ID") - quota_item = odm.Boolean(default=False, description="Does this submission count against quota?") - services = odm.Compound(ServiceSelection, default={}, description="Service selection") + default=False, description="Disables dynamic recursion prevention for this submission.") + + ignore_filtering = odm.Boolean(default=False, description="**TODO**: **Original**:Should we ignore filtering services? **Generated**:Indicates if filtering services should be skipped, allowing all files to be processed.") + ignore_size = odm.Boolean(default=False, description="Allows the submission to bypass any file size restrictions set by the system.") + never_drop = odm.Boolean(default=False, description="Ensures the submission will not be dropped by the ingestion service, regardless of system load.") + malicious = odm.Boolean(default=False, description="If set to true, the system acknowledges the user's assertion that the file is malicious by automatically casting a malicious vote for the submission.") + max_extracted = odm.Integer(default=500, description="The maximum number of files that can be extracted from the submission for separate analysis.") + max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") + priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") + profile = odm.Boolean(default=False, description="Triggers additional profiling of the submission for performance and analysis metrics.") + psid = odm.Optional(odm.UUID(), description="The ID of a parent submission when the current submission is a follow-up or an extension of the parent submission.") + quota_item = odm.Boolean(default=False, description="Indicates if this submission should count against the submitter's quota.") + services = odm.Compound(ServiceSelection, default={}, description="Details which services are selected or excluded from processing this submission.") service_spec = odm.Mapping(odm.Mapping(odm.Any()), default={}, index=False, store=False, - description="Service-specific parameters") - submitter = odm.Keyword(store=True, copyto="__text__", description="User who submitted the file") - ttl = odm.Integer(default=0, description="Time, in days, to live for this submission") - type = odm.Keyword(default="USER", description="Type of submission") - initial_data = odm.Optional(odm.Text(index=False), description="Initialization for temporary submission data") + description="A dictionary specifying service-specific parameters that may alter service behavior for this submission.") + submitter = odm.Keyword(store=True, copyto="__text__", description="Name of the account that submitted the file for analysis.") + ttl = odm.Integer(default=0, description="The time-to-live for the submission, defining how many days it will be retained before expiry.") + type = odm.Keyword(default="USER", description="The type of submission (e.g., `USER` for user-submitted), indicating its origin or purpose.") + initial_data = odm.Optional(odm.Text(index=False), description="Content provided at the time of submission to initialize the temporary submission data, which may be used during analysis.") auto_archive = odm.Boolean(default=False, - description="Does the submission automatically goes into the archive when completed?") + description="Determines whether the submission should automatically be archived upon completion.") delete_after_archive = odm.Boolean( default=False, - description="When the submission is archived, should we delete it from hot storage right away?") + description="Specifies if the submission data should be deleted from active storage immediately after archiving.") use_archive_alternate_dtl = odm.Boolean(default=False, - description="Should we use the alternate dtl while archiving?") + description="Indicates if an alternate data lifetime should be applied to the submission once archived.") def get_hashing_keys(self): """Get the sections of the submission parameters that should be used in result hashes.""" @@ -114,46 +119,49 @@ def create_filescore_key(self, sha256, services: list = None): return 'v'.join([str(hashlib.md5(s.encode()).hexdigest()), str(version)]) -@odm.model(index=True, store=True, description="Submission-Relevant Times") +@odm.model(index=True, store=True, description="""Captures important timestamps related to the submission's lifecycle, such as when it was submitted and when the analysis was completed. +""") class Times(odm.Model): - completed = odm.Optional(odm.Date(store=False), description="Date at which the submission finished scanning") - submitted = odm.Date(default="NOW", description="Date at which the submission started scanning") + completed = odm.Optional(odm.Date(store=False), description="Timestamp recording when the analysis of the submission was completed.") + submitted = odm.Date(default="NOW", description="Timestamp recording when the submission was initiated and began the analysis process.") -@odm.model(index=True, store=False, description="Submission Verdict") +@odm.model(index=True, store=False, description="""Records user feedback regarding the perceived maliciousness of a submission, serving as an aggregation of user opinions rather than the definitive verdict of the system. +""") class Verdict(odm.Model): - malicious = odm.List(odm.Keyword(), default=[], description="List of user that thinks this submission is malicious") + malicious = odm.List(odm.Keyword(), default=[], description="A list of usernames representing users who have judged the submission to be malicious.") non_malicious = odm.List( odm.Keyword(), default=[], - description="List of user that thinks this submission is non-malicious") + description="A list of usernames representing users who have judged the submission to be non-malicious.") -@odm.model(index=True, store=True, description="Model of Submission") +@odm.model(index=True, store=True, description="""A Submission in Assemblyline is a critical entity that encapsulates the analysis process and data for a file or collection of files submitted to determine the presence of malware. It contains a wealth of attributes such as file details, parameters for analysis, and the outcome of the scan. Familiarity with the Submission model is essential for users who need to construct precise Lucene search queries. This understanding enables them to effectively navigate and query the Assemblyline system to obtain detailed information on submissions, streamline their search efforts, and efficiently access the desired analysis results. +""") class Submission(odm.Model): - archive_ts = odm.Optional(odm.Date(description="Time at which the submission was archived", ai=False)) - archived = odm.Boolean(default=False, description="Document is present in the malware archive", ai=False) - classification = odm.Classification(description="Classification of the submission") - error_count = odm.Integer(description="Total number of errors in the submission", ai=False) - errors: list[str] = odm.List(odm.Keyword(), store=False, description="List of error keys", ai=False) - expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp", ai=False) - file_count = odm.Integer(description="Total number of files in the submission", ai=False) - files: list[File] = odm.List(odm.Compound(File), description="List of files that were originally submitted") - max_score = odm.Integer(description="Maximum score of all the files in the scan") - metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") - params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) - results: list[str] = odm.List(odm.Keyword(), store=False, description="List of result keys", ai=False) - sid: str = odm.UUID(copyto="__text__", description="Submission ID") - state = odm.Enum(values=SUBMISSION_STATES, description="Status of the submission", ai=False) + archive_ts = odm.Optional(odm.Date(ai=False), description="An optional timestamp indicating when the submission was archived.") + archived = odm.Boolean(default=False, description="A boolean indicating whether the submission, including the file(s), has been stored in the malware archive.", ai=False) + classification = odm.Classification(description="Security classification level of the submission.") + error_count = odm.Integer(description="The total number of errors encountered during the analysis process.", ai=False) + errors: list[str] = odm.List(odm.Keyword(), store=False, description="A list of error identifiers that were encountered during the analysis process.", ai=False) + expiry_ts = odm.Optional(odm.Date(store=False), description="An optional timestamp specifying when the submission will expire and be purged from the system.", ai=False) + file_count = odm.Integer(description="The total number of files included in the submission.", ai=False) + files: list[File] = odm.List(odm.Compound(File), description="An array of File objects representing the files submitted for analysis, including their names, sizes, and SHA256 hashes.") + max_score = odm.Integer(description="The highest score assigned to any file within the submission, serving as an indicator of the overall potential threat level of the submission.") + metadata = odm.FlattenedObject(store=False, description="A flattened object containing additional metadata associated with the submission.") + params: SubmissionParams = odm.Compound(SubmissionParams, description="Encapsulates the configuration settings and preferences for how the submission is to be processed.", ai=False) + results: list[str] = odm.List(odm.Keyword(), store=False, description="A list of identifiers for the results generated by the analysis services.", ai=False) + sid: str = odm.UUID(copyto="__text__", description="The unique identifier (UUID) for the submission, used to track and reference it within the system.") + state = odm.Enum(values=SUBMISSION_STATES, description="The current status of the submission.", ai=False) to_be_deleted = odm.Boolean( - default=False, description="This document is going to be deleted as soon as it finishes", ai=False) - times = odm.Compound(Times, default={}, description="Submission-specific times") - verdict = odm.Compound(Verdict, default={}, description="Malicious verdict details", ai=False) - from_archive = odm.Boolean(index=False, default=False, description="Was loaded from the archive", ai=False) + default=False, description="A boolean flag indicating whether the file(s) associated with the submission are scheduled to be deleted from the system once the analysis is complete.", ai=False) + times = odm.Compound(Times, default={}, description="An object of type Times that includes timestamps for when the submission was submitted and completed.") + verdict = odm.Compound(Verdict, default={}, description="Captures user judgments on whether the submission is considered malicious or non-malicious.", ai=False) + from_archive = odm.Boolean(index=False, default=False, description="A boolean indicating if the submission was loaded from the archive for reanalysis.", ai=False) # the filescore key, used in deduplication. This is a non-unique key, that is # shared by submissions that may be processed as duplicates. - scan_key = odm.Optional(odm.Keyword(store=False, index=False, ai=False)) + scan_key = odm.Optional(odm.Keyword(store=False, index=False, ai=False), description="An optional non-unique identifier known as the filescore key, which is used in the deduplication process. This key may be shared among multiple submissions that contain similar or identical files, thereby allowing Assemblyline to recognize and process them as potential duplicates efficiently.") def is_submit(self): return self.state == 'submitted'