From e326500bd9e2fa26e89890bc446a93f269cd8138 Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Fri, 20 Sep 2024 07:32:27 -0400 Subject: [PATCH 01/11] First update to submission and result --- assemblyline/odm/models/result.py | 113 ++++++++++----------- assemblyline/odm/models/submission.py | 139 ++++++++++++++------------ 2 files changed, 129 insertions(+), 123 deletions(-) diff --git a/assemblyline/odm/models/result.py b/assemblyline/odm/models/result.py index 762326c7e..422c911a4 100644 --- a/assemblyline/odm/models/result.py +++ b/assemblyline/odm/models/result.py @@ -32,112 +32,109 @@ constants = forge.get_constants() -@odm.model(index=True, store=False) +@odm.model(index=True, store=False, description="Represents cyber attack tactics or techniques as identified by the MITRE ATT&CK framework.") class Attack(odm.Model): - attack_id = odm.Keyword(copyto="__text__", description="ID", ai=False) - pattern = odm.Keyword(copyto="__text__", description="Pattern Name") - categories = odm.List(odm.Keyword(), description="Categories") + attack_id = odm.Keyword(copyto="__text__", description="Unique identifier corresponding to a specific tactic or technique in the MITRE ATT&CK framework.", ai=False) + pattern = odm.Keyword(copyto="__text__", description="The name of the MITRE ATT&CK pattern that is associated with the detected malware or malicious activity.") + categories = odm.List(odm.Keyword(), description="A list of categories that describe and classify the nature of the cyber attack based on the MITRE ATT&CK framework.") -@odm.model(index=True, store=False, description="Heuristic Signatures") +@odm.model(index=True, store=False, description="Describes a signature that has matched, indicating potential malicious behavior.") class Signature(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the signature that triggered the heuristic") - frequency = odm.Integer(default=1, description="Number of times this signature triggered the heuristic") - safe = odm.Boolean(default=False, description="Is the signature safelisted or not") + name = odm.Keyword(copyto="__text__", description="Name of the detection signature that triggered the heuristic.") + frequency = odm.Integer(default=1, description="The count of how many times this particular signature has triggered the heuristic during analysis.") + safe = odm.Boolean(default=False, description=" A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score. **TODO**:Is the last part about not contributing to the score true?") @odm.model(index=True, store=False, description="Heuristic associated to the Section") class Heuristic(odm.Model): - heur_id = odm.Keyword(copyto="__text__", description="ID of the heuristic triggered", ai=False) - name = odm.Keyword(copyto="__text__", description="Name of the heuristic") - attack = odm.List(odm.Compound(Attack), default=[], description="List of Att&ck IDs related to this heuristic") + heur_id = odm.Keyword(copyto="__text__", description="Unique identifier of the heuristic that was triggered, contributing to the overall assessment of potential maliciousness.", ai=False) + name = odm.Keyword(copyto="__text__", description="The name of the heuristic rule that was triggered during the analysis.") + attack = odm.List(odm.Compound(Attack), default=[], description="A list of MITRE ATT&CK identifiers that are associated with this heuristic, linking detected behavior to known techniques.") signature = odm.List(odm.Compound(Signature), default=[], - description="List of signatures that triggered the heuristic", ai=False) - score = odm.Integer(description="Calculated Heuristic score") + description="A list of signatures whose detection has contributed to the triggering of this heuristic.", ai=False) + score = odm.Integer(description="The score assigned by this heuristic, which contributes to the overall threat assessment of the analyzed artifact.") @odm.model(index=True, store=False, description="Result Section") class Section(odm.Model): - auto_collapse = odm.Boolean(default=False, description="Should the section be collapsed when displayed?", ai=False) - body = odm.Optional(odm.Text(copyto="__text__"), description="Text body of the result section") - classification = odm.Classification(description="Classification of the section", ai=False) - body_format = odm.Enum(values=BODY_FORMAT, index=False, description="Type of body in this section") - body_config = odm.Optional(odm.Mapping(odm.Any(), index=False, - description="Configurations for the body of this section"), ai=False) - depth = odm.Integer(index=False, description="Depth of the section", ai=False) - heuristic = odm.Optional(odm.Compound(Heuristic), description="Heuristic used to score result section") - tags = odm.Compound(Tagging, default={}, description="List of tags associated to this section") - safelisted_tags = odm.FlattenedListObject(store=False, default={}, description="List of safelisted tags", ai=False) - title_text = odm.Text(copyto="__text__", description="Title of the section") + auto_collapse = odm.Boolean(default=False, description="Indicates whether the section should be initially displayed as collapsed in the user interface.", ai=False) + body = odm.Optional(odm.Text(copyto="__text__"), description="The main content of the result section, which may include detailed analysis findings or descriptions.") + classification = odm.Classification(description="The classification level assigned to the information within the section, dictating who can view it.", ai=False) + body_format = odm.Enum(values=BODY_FORMAT, index=False, description="**TODO**: **Original**:Type of body in this section **Generated**:The format of the body content, such as text, JSON, or image, which determines how it is displayed.") + body_config = odm.Optional(odm.Mapping(odm.Any(), index=False), + description="**TODO**: **Original**:Configurations for the body of this section **Generated**:Additional configurations that specify how the body content should be rendered or processed.", ai=False) + depth = odm.Integer(index=False, description="**TODO**: **Original**:Depth of the section **Generated**:The nesting level of the section within the overall result hierarchy, used for organizing complex results.", ai=False) + heuristic = odm.Optional(odm.Compound(Heuristic), description="The heuristic analysis that contributed to the scoring of this section, if applicable.") + tags = odm.Compound(Tagging, default={}, description="A collection of tags that categorize or label the section based on the analysis findings.") + safelisted_tags = odm.FlattenedListObject(store=False, default={}, description="Tags that have been deemed safe and are excluded from contributing to the overall threat score.", ai=False) + title_text = odm.Text(copyto="__text__", description="The title of the section, summarizing its content or purpose.") promote_to = odm.Optional(odm.Enum( - values=PROMOTE_TO, - description="This is the type of data that the current section should be promoted to.", ai=False)) + values=PROMOTE_TO, ai=False), description="**TODO**: **Original**:This is the type of data that the current section should be promoted to. *Generated**:The category of data that this section's content should be elevated to for reporting or further analysis.") @odm.model(index=True, store=True, description="Result Body") class ResultBody(odm.Model): - score = odm.Integer(default=0, description="Aggregate of the score for all heuristics") - sections = odm.List(odm.Compound(Section), default=[], description="List of sections") + score = odm.Integer(default=0, description="The total score calculated from all heuristics applied, indicating overall severity.") + sections = odm.List(odm.Compound(Section), default=[], description="An ordered list of Section objects that detail the analysis results.") @odm.model(index=False, store=False, description="Service Milestones") class Milestone(odm.Model): - service_started = odm.Date(default="NOW", description="Date the service started scanning") - service_completed = odm.Date(default="NOW", description="Date the service finished scanning") + service_started = odm.Date(default="NOW", description="Timestamp marking when the service began its analysis of the artifact.") + service_completed = odm.Date(default="NOW", description="Timestamp marking when the service completed its analysis, signaling the end of processing for the artifact.") @odm.model(index=True, store=False, description="File related to the Response") class File(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the file") - sha256 = odm.SHA256(copyto="__text__", description="SHA256 of the file") - description = odm.Text(copyto="__text__", description="Description of the file") - classification = odm.Classification(description="Classification of the file", ai=False) + name = odm.Keyword(copyto="__text__", description="The original name of the file being analyzed or generated during the analysis process.") + sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file, serving as a unique identifier for the content.") + description = odm.Text(copyto="__text__", description="A brief description of the file's purpose or contents, especially if it is an output of the analysis.") + classification = odm.Classification(description="The classification level of the file, indicating the sensitivity of its contents.", ai=False) is_section_image = odm.Boolean(default=False, - description="Is this an image used in an Image Result Section?", ai=False) + description="A flag indicating whether the file is an image that is used within an image-based result section.", ai=False) # Possible values for PARENT_RELATION can be found in # assemblyline-v4-service/assemblyline_v4_service/common/task.py. parent_relation = odm.Text( default="EXTRACTED", - description="File relation to parent, if any.\ -
Values: `\"ROOT\", \"EXTRACTED\", \"INFORMATION\", \"DYNAMIC\", \"MEMDUMP\", \"DOWNLOADED\"`", ai=False + description="Describes the relationship of this file to the parent file, such as `EXTRACTED` or `DOWNLOADED`.", ai=False ) allow_dynamic_recursion = odm.Boolean( default=False, - description="Allow file to be analysed during Dynamic Analysis" - "even if Dynamic Recursion Prevention is enabled.", ai=False) + description="Specifies whether the file can be analyzed during dynamic analysis, even with recursion prevention.", ai=False) @odm.model(index=True, store=True, description="Response Body of Result") class ResponseBody(odm.Model): - milestones = odm.Compound(Milestone, default={}, description="Milestone block", ai=False) - service_version = odm.Keyword(store=False, description="Version of the service", ai=False) - service_name = odm.Keyword(copyto="__text__", description="Name of the service that scanned the file") + milestones = odm.Compound(Milestone, default={}, description="A set of key timestamps that mark important stages in the service's processing of the file.", ai=False) + service_version = odm.Keyword(store=False, description="The version of the service that performed the analysis, important for tracking analysis provenance.", ai=False) + service_name = odm.Keyword(copyto="__text__", description="The name of the service that conducted the analysis, useful for identifying the source of the results.") service_tool_version = odm.Optional( odm.Keyword(copyto="__text__"), - description="Tool version of the service", ai=False) - supplementary = odm.List(odm.Compound(File), default=[], description="List of supplementary files", ai=False) - extracted = odm.List(odm.Compound(File), default=[], description="List of extracted files") + description="The specific version of the analytical tool used by the service, if applicable.", ai=False) + supplementary = odm.List(odm.Compound(File), default=[], description="A list of additional files generated during analysis that support the main findings.", ai=False) + extracted = odm.List(odm.Compound(File), default=[], description="A list of files that were extracted from the analyzed artifact during the service's processing.") service_context = odm.Optional( odm.Keyword(index=False, store=False), - description="Context about the service", ai=False) + description="Additional context or metadata about the service's execution environment or configuration.", ai=False) service_debug_info = odm.Optional( odm.Keyword(index=False, store=False), - description="Debug info about the service", ai=False) + description="Information that can be used for debugging or understanding the service's analysis process.", ai=False) @odm.model(index=True, store=True, description="Result Model") class Result(odm.Model): - archive_ts = odm.Optional(odm.Date(description="Time at which the result was archived", ai=False)) - classification = odm.Classification(description="Aggregate classification for the result", ai=False) - created = odm.Date(default="NOW", description="Date at which the result object got created", ai=False) - expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp", ai=False) - response: ResponseBody = odm.compound(ResponseBody, description="The body of the response from the service") - result: ResultBody = odm.compound(ResultBody, default={}, description="The result body") - sha256 = odm.SHA256(store=False, description="SHA256 of the file the result object relates to") - type = odm.Optional(odm.Keyword()) - size = odm.Optional(odm.Integer()) - drop_file = odm.Boolean(default=False, description="Use to not pass to other stages after this run", ai=False) - from_archive = odm.Boolean(index=False, default=False, description="Was loaded from the archive", ai=False) + archive_ts = odm.Optional(odm.Date(ai=False), description="The timestamp when the result was moved to long-term storage or archived.") + classification = odm.Classification(description="The highest classification level assigned to any part of the result, dictating overall access control.", ai=False) + created = odm.Date(default="NOW", description="The creation timestamp for the result record, marking when the analysis result was first generated.", ai=False) + expiry_ts = odm.Optional(odm.Date(store=False), description="The timestamp when the result is scheduled to be purged or deleted from the system.", ai=False) + response: ResponseBody = odm.compound(ResponseBody, description="The container for all the response data provided by the service after analyzing the file.") + result: ResultBody = odm.compound(ResultBody, default={}, description="The container for the detailed results of the analysis, including sections and scores.") + sha256 = odm.SHA256(store=False, description="The SHA256 hash of the file that was analyzed, linking the result to the specific artifact.") + type = odm.Optional(odm.Keyword(), description="**TODO**") + size = odm.Optional(odm.Integer(), description="**TODO**") + drop_file = odm.Boolean(default=False, description="A flag indicating whether the file should be excluded from subsequent analysis stages.", ai=False) + from_archive = odm.Boolean(index=False, default=False, description="**TODO**: **Original**:Was loaded from the archive **Generated**:Indicates whether the result was retrieved from an archive rather than produced from a recent analysis.", ai=False) def build_key(self, service_tool_version=None, task=None): return self.help_build_key( diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 7acf03a89..dc8f6c88b 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -1,7 +1,10 @@ from __future__ import annotations + import hashlib + from assemblyline import odm from assemblyline.common import forge + Classification = forge.get_classification() SUBMISSION_STATES = ['failed', 'submitted', 'completed'] @@ -9,24 +12,26 @@ DEFAULT_RESUBMIT = [] -@odm.model(index=True, store=False, description="File Model of Submission") +@odm.model(index=True, store=False, description="""Contains details about the individual files included in the submission, including their names, sizes, and SHA256 hashes. +""") class File(odm.Model): - name = odm.Keyword(copyto="__text__", description="Name of the file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") - sha256 = odm.SHA256(copyto="__text__", description="SHA256 hash of the file") + name = odm.Keyword(copyto="__text__", description="The original name of the file as it was submitted.") + size = odm.Optional(odm.Integer(), description="The file's size in bytes.") + sha256 = odm.SHA256(copyto="__text__", description="The SHA256 cryptographic hash of the file, serving as a unique identifier for the file's content.") -@odm.model(index=False, store=False, description="Service Selection Scheme") +@odm.model(index=False, store=False, description="""Outlines the services selected for analysis, any excluded services, and any additional services that should be used in the case of rescan or resubmit actions. +""") class ServiceSelection(odm.Model): - selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="List of selected services") - excluded = odm.List(odm.Keyword(), default=[], description="List of excluded services") + selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="Services selected to process the submission, which determine the scope of analysis.") + excluded = odm.List(odm.Keyword(), default=[], description="Services explicitly excluded from processing the submission, bypassing their analysis.") rescan = odm.List( odm.Keyword(), default=[], - description="List of services to rescan when initial run scores as malicious") + description="Services to be used for a rescan if the submission's initial results are deemed malicious.") resubmit = odm.List(odm.Keyword(), default=DEFAULT_RESUBMIT, - description="Add to service selection when resubmitting") - runtime_excluded = odm.List(odm.Keyword(), default=[], description="List of runtime excluded services") + description="Additional services that are added to the selection when a submission is resubmitted.") + runtime_excluded = odm.List(odm.Keyword(), default=[], description="**TODO**: **Original**:List of runtime excluded services **Generated**:Services that are excluded during the submission's runtime, possibly due to dynamic decision-making.") # Fields in the parameters used to calculate hashes used for result caching @@ -44,46 +49,47 @@ class ServiceSelection(odm.Model): } -@odm.model(index=True, store=False, description="Submission Parameters") +@odm.model(index=True, store=False, description="""Specifies the parameters set at the time of submission, such as whether to perform a deep scan, if the submission should generate an alert, and any specific service selections or settings. +""") class SubmissionParams(odm.Model): classification = odm.Classification(default=Classification.UNRESTRICTED, - description="Original classification of the submission") - deep_scan = odm.Boolean(default=False, description="Should a deep scan be performed?") - description = odm.Text(store=True, copyto="__text__", description="Description of the submission") - generate_alert = odm.Boolean(default=False, description="Should this submission generate an alert?") - groups = odm.List(odm.Keyword(), default=[], description="List of groups related to this scan") - ignore_cache = odm.Boolean(default=False, description="Ignore the cached service results?") + description="The initial security classification for the submission, indicating its confidentiality.") + deep_scan = odm.Boolean(default=False, description="**TODO**: **Original**:Should a deep scan be performed? **Generated**:Specifies whether a more intensive scan should be conducted on the submission.") + description = odm.Text(store=True, copyto="__text__", description="A user-provided description or notes regarding the submission.") + generate_alert = odm.Boolean(default=False, description="**TODO**: **Original**:Should this submission generate an alert? **Generated**:Determines if an alert should be generated upon analysis completion.") + groups = odm.List(odm.Keyword(), default=[], description="**TODO**: **Original**:List of groups related to this scan **Generated**:A list of group identifiers relevant to this submission, often used for access control.") + ignore_cache = odm.Boolean(default=False, description="Bypasses any cached results for services, forcing all services to process the submission anew.") ignore_recursion_prevention = odm.Boolean( - default=False, description="Should we ignore recursion prevention?") + default=False, description="Overrides the default mechanism that prevents recursive scanning of extracted files.") # TODO: The following three lines can be removed after assemblyline upgrade to 4.6+ ignore_dynamic_recursion_prevention = odm.Boolean( - default=False, description="Should we ignore dynamic recursion prevention?") - - ignore_filtering = odm.Boolean(default=False, description="Should we ignore filtering services?") - ignore_size = odm.Boolean(default=False, description="Ignore the file size limits?") - never_drop = odm.Boolean(default=False, description="Exempt from being dropped by ingester?") - malicious = odm.Boolean(default=False, description="Is the file submitted already known to be malicious?") - max_extracted = odm.Integer(default=500, description="Max number of extracted files") - max_supplementary = odm.Integer(default=500, description="Max number of supplementary files") - priority = odm.Integer(default=1000, description="Priority of the scan") - profile = odm.Boolean(default=False, description="Should the submission do extra profiling?") - psid = odm.Optional(odm.UUID(), description="Parent submission ID") - quota_item = odm.Boolean(default=False, description="Does this submission count against quota?") - services = odm.Compound(ServiceSelection, default={}, description="Service selection") + default=False, description="Disables dynamic recursion prevention for this submission.") + + ignore_filtering = odm.Boolean(default=False, description="**TODO**: **Original**:Should we ignore filtering services? **Generated**:Indicates if filtering services should be skipped, allowing all files to be processed.") + ignore_size = odm.Boolean(default=False, description="Allows the submission to bypass any file size restrictions set by the system.") + never_drop = odm.Boolean(default=False, description="**TODO**: **Original**:Exempt from being dropped by ingester? **Generated**:Ensures the submission will not be dropped by the ingestion service, regardless of system load.") + malicious = odm.Boolean(default=False, description="**TODO**: **Original**:Is the file submitted already known to be malicious? **Generated**:Flags the submission as known to be malicious, possibly altering its handling.") + max_extracted = odm.Integer(default=500, description="The maximum number of files that can be extracted from the submission for separate analysis.") + max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") + priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") + profile = odm.Boolean(default=False, description="**TODO**: **Original**:Should the submission do extra profiling? **Generated**:Triggers additional profiling of the submission for performance and analysis metrics.") + psid = odm.Optional(odm.UUID(), description="The ID of a parent submission, if this submission is related to or derived from another.") + quota_item = odm.Boolean(default=False, description="Indicates if this submission should count against the submitter's quota.") + services = odm.Compound(ServiceSelection, default={}, description="Details which services are selected or excluded from processing this submission.") service_spec = odm.Mapping(odm.Mapping(odm.Any()), default={}, index=False, store=False, - description="Service-specific parameters") - submitter = odm.Keyword(store=True, copyto="__text__", description="User who submitted the file") - ttl = odm.Integer(default=0, description="Time, in days, to live for this submission") - type = odm.Keyword(default="USER", description="Type of submission") - initial_data = odm.Optional(odm.Text(index=False), description="Initialization for temporary submission data") + description="A dictionary specifying service-specific parameters that may alter service behavior for this submission.") + submitter = odm.Keyword(store=True, copyto="__text__", description="The username of the individual who submitted the file for analysis.") + ttl = odm.Integer(default=0, description="**TODO**: **Original**:Time, in days, to live for this submission **Generated**:The time-to-live for the submission, defining how many days it will be retained before expiry.") + type = odm.Keyword(default="USER", description="The type of submission (e.g., `USER` for user-submitted), indicating its origin or purpose.") + initial_data = odm.Optional(odm.Text(index=False), description="Initial temporary data provided at the time of submission, which may be used during analysis.") auto_archive = odm.Boolean(default=False, - description="Does the submission automatically goes into the archive when completed?") + description="Determines whether the submission should automatically be archived upon completion.") delete_after_archive = odm.Boolean( default=False, - description="When the submission is archived, should we delete it from hot storage right away?") + description="Specifies if the submission data should be deleted from active storage immediately after archiving.") use_archive_alternate_dtl = odm.Boolean(default=False, - description="Should we use the alternate dtl while archiving?") + description="**TODO**: **Original**:Should we use the alternate dtl while archiving? **Generated**:Indicates if an alternate data lifetime should be applied to the submission once archived.") def get_hashing_keys(self): """Get the sections of the submission parameters that should be used in result hashes.""" @@ -114,46 +120,49 @@ def create_filescore_key(self, sha256, services: list = None): return 'v'.join([str(hashlib.md5(s.encode()).hexdigest()), str(version)]) -@odm.model(index=True, store=True, description="Submission-Relevant Times") +@odm.model(index=True, store=True, description="""Captures important timestamps related to the submission's lifecycle, such as when it was submitted and when the analysis was completed. +""") class Times(odm.Model): - completed = odm.Optional(odm.Date(store=False), description="Date at which the submission finished scanning") - submitted = odm.Date(default="NOW", description="Date at which the submission started scanning") + completed = odm.Optional(odm.Date(store=False), description="Timestamp recording when the analysis of the submission was completed.") + submitted = odm.Date(default="NOW", description="Timestamp marking when the submission was initiated and began the analysis process.") -@odm.model(index=True, store=False, description="Submission Verdict") +@odm.model(index=True, store=False, description="""Reflects the consensus on whether the submission is deemed malicious or non-malicious based on user input. +""") class Verdict(odm.Model): - malicious = odm.List(odm.Keyword(), default=[], description="List of user that thinks this submission is malicious") + malicious = odm.List(odm.Keyword(), default=[], description="A list of usernames representing users who have judged the submission to be malicious.") non_malicious = odm.List( odm.Keyword(), default=[], - description="List of user that thinks this submission is non-malicious") + description="A list of usernames representing users who have judged the submission to be non-malicious.") -@odm.model(index=True, store=True, description="Model of Submission") +@odm.model(index=True, store=True, description="""A Submission in Assemblyline represents the process and associated data for analyzing a file or set of files to determine if they contain malware. When a user submits a file for analysis, this submission is encapsulated within a Submission object, which includes various attributes detailing the file data, analysis parameters, and the results of the scan. +""") class Submission(odm.Model): - archive_ts = odm.Optional(odm.Date(description="Time at which the submission was archived", ai=False)) - archived = odm.Boolean(default=False, description="Document is present in the malware archive", ai=False) - classification = odm.Classification(description="Classification of the submission") - error_count = odm.Integer(description="Total number of errors in the submission", ai=False) - errors: list[str] = odm.List(odm.Keyword(), store=False, description="List of error keys", ai=False) - expiry_ts = odm.Optional(odm.Date(store=False), description="Expiry timestamp", ai=False) - file_count = odm.Integer(description="Total number of files in the submission", ai=False) - files: list[File] = odm.List(odm.Compound(File), description="List of files that were originally submitted") - max_score = odm.Integer(description="Maximum score of all the files in the scan") - metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") - params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) - results: list[str] = odm.List(odm.Keyword(), store=False, description="List of result keys", ai=False) - sid: str = odm.UUID(copyto="__text__", description="Submission ID") - state = odm.Enum(values=SUBMISSION_STATES, description="Status of the submission", ai=False) + archive_ts = odm.Optional(odm.Date(ai=False), description="An optional timestamp indicating when the submission was archived.") + archived = odm.Boolean(default=False, description="A boolean indicating whether the submission, including the file(s), has been stored in the malware archive.", ai=False) + classification = odm.Classification(description="Security classification level of the submission.") + error_count = odm.Integer(description="The total number of errors encountered during the analysis process.", ai=False) + errors: list[str] = odm.List(odm.Keyword(), store=False, description="A list of error identifiers that were triggered throughout the processing of the submission.", ai=False) + expiry_ts = odm.Optional(odm.Date(store=False), description="An optional timestamp specifying when the submission will expire and be purged from the system.", ai=False) + file_count = odm.Integer(description="The total number of files included in the submission.", ai=False) + files: list[File] = odm.List(odm.Compound(File), description="An array of File objects representing the files submitted for analysis, including their names, sizes, and SHA256 hashes.") + max_score = odm.Integer(description="The highest score assigned to any file within the submission, used to indicate the potential threat level. The score is defined as the following: -1000: safe, [0, 299]: informative, [300 - 699]: suspicious, [700, 999]: highly suspicious, [1000, +inf): malicious.") + metadata = odm.FlattenedObject(store=False, description="A flattened object containing additional metadata associated with the submission.") + params: SubmissionParams = odm.Compound(SubmissionParams, description="Encapsulates the configuration settings and preferences for how the submission is to be processed.", ai=False) + results: list[str] = odm.List(odm.Keyword(), store=False, description="A list of identifiers for the results generated by the analysis services.", ai=False) + sid: str = odm.UUID(copyto="__text__", description="The unique identifier (UUID) for the submission, used to track and reference it within the system.") + state = odm.Enum(values=SUBMISSION_STATES, description="The current status of the submission.", ai=False) to_be_deleted = odm.Boolean( - default=False, description="This document is going to be deleted as soon as it finishes", ai=False) - times = odm.Compound(Times, default={}, description="Submission-specific times") - verdict = odm.Compound(Verdict, default={}, description="Malicious verdict details", ai=False) - from_archive = odm.Boolean(index=False, default=False, description="Was loaded from the archive", ai=False) + default=False, description="A boolean flag indicating whether the file(s) associated with the submission are scheduled to be deleted from the system once the analysis is complete.", ai=False) + times = odm.Compound(Times, default={}, description="An object of type Times that includes timestamps for when the submission was submitted and completed.") + verdict = odm.Compound(Verdict, default={}, description="Captures user judgments on whether the submission is considered malicious or non-malicious.", ai=False) + from_archive = odm.Boolean(index=False, default=False, description="A boolean indicating if the submission was loaded from the archive for reanalysis.", ai=False) # the filescore key, used in deduplication. This is a non-unique key, that is # shared by submissions that may be processed as duplicates. - scan_key = odm.Optional(odm.Keyword(store=False, index=False, ai=False)) + scan_key = odm.Optional(odm.Keyword(store=False, index=False, ai=False), description="An optional non-unique identifier known as the filescore key, which is used in the deduplication process. This key may be shared among multiple submissions that contain similar or identical files, thereby allowing Assemblyline to recognize and process them as potential duplicates efficiently.") def is_submit(self): return self.state == 'submitted' From 671fa8613e9e56f38601499f70bdd9c8effab578 Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Tue, 24 Sep 2024 10:03:28 -0400 Subject: [PATCH 02/11] Update to Submission description --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index dc8f6c88b..247f39043 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -137,7 +137,7 @@ class Verdict(odm.Model): description="A list of usernames representing users who have judged the submission to be non-malicious.") -@odm.model(index=True, store=True, description="""A Submission in Assemblyline represents the process and associated data for analyzing a file or set of files to determine if they contain malware. When a user submits a file for analysis, this submission is encapsulated within a Submission object, which includes various attributes detailing the file data, analysis parameters, and the results of the scan. +@odm.model(index=True, store=True, description="""A Submission in Assemblyline is a critical entity that encapsulates the analysis process and data for a file or collection of files submitted to determine the presence of malware. It contains a wealth of attributes such as file details, parameters for analysis, and the outcome of the scan. Familiarity with the Submission model is essential for users who need to construct precise Lucene search queries. This understanding enables them to effectively navigate and query the Assemblyline system to obtain detailed information on submissions, streamline their search efforts, and efficiently access the desired analysis results. """) class Submission(odm.Model): archive_ts = odm.Optional(odm.Date(ai=False), description="An optional timestamp indicating when the submission was archived.") From fca979ea74fd8085cf6ec688b438d17ddbc3ff97 Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Tue, 24 Sep 2024 13:29:55 -0400 Subject: [PATCH 03/11] Remove extra space --- assemblyline/odm/models/result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/result.py b/assemblyline/odm/models/result.py index 422c911a4..cff6bb3c3 100644 --- a/assemblyline/odm/models/result.py +++ b/assemblyline/odm/models/result.py @@ -43,7 +43,7 @@ class Attack(odm.Model): class Signature(odm.Model): name = odm.Keyword(copyto="__text__", description="Name of the detection signature that triggered the heuristic.") frequency = odm.Integer(default=1, description="The count of how many times this particular signature has triggered the heuristic during analysis.") - safe = odm.Boolean(default=False, description=" A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score. **TODO**:Is the last part about not contributing to the score true?") + safe = odm.Boolean(default=False, description="A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score. **TODO**:Is the last part about not contributing to the score true?") @odm.model(index=True, store=False, description="Heuristic associated to the Section") From 2bf4381db8059face9687ac33b897dc101bb1903 Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Fri, 27 Sep 2024 08:53:20 -0400 Subject: [PATCH 04/11] Update to Signature, Statistics and Submission ODM documentation --- assemblyline/odm/models/signature.py | 33 +++++++++++++++------------ assemblyline/odm/models/statistics.py | 21 ++++++++++------- assemblyline/odm/models/submission.py | 2 +- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/assemblyline/odm/models/signature.py b/assemblyline/odm/models/signature.py index 341ffed25..a7a70b374 100644 --- a/assemblyline/odm/models/signature.py +++ b/assemblyline/odm/models/signature.py @@ -11,18 +11,23 @@ RULE_STATUSES = DEPLOYED_STATUSES + DRAFT_STATUSES + STALE_STATUSES -@odm.model(index=True, store=True) +@odm.model(index=True, store=True, description="""The Signature model within Assemblyline serves as a central framework for defining and managing security signatures, crucial components for the detection and analysis of malware. It provides a structured format that encompasses essential attributes such as a signature's name, classification level, source, and unique identifier. Moreover, it incorporates revision history, last modification timestamps, and statistical data to gauge performance and utility. + +The model presents users with a mix of static and dynamic information, ranging from immutable identification details to variable metadata reflecting the signature's current operational status and history of changes. This latter aspect is captured through fields that log the date and user associated with the last status update, offering a view into the signature's lifecycle. + +Understanding the Signature model is vital for cybersecurity professionals who are tasked with crafting precise Lucene-based search queries in Assemblyline. Mastery of this model's components will enable users to efficiently search, filter, and analyze signatures based on various parameters, thereby facilitating effective management and deployment in a cybersecurity context. +""") class Signature(odm.Model): - classification = odm.Classification(store=True, default=Classification.UNRESTRICTED) - data = odm.Text(index=False, store=False) - last_modified = odm.Date(default="NOW") - name = odm.Keyword(copyto="__text__") - order = odm.Integer(default=1, store=False) - revision = odm.Keyword(default="1") - signature_id = odm.Optional(odm.Keyword()) - source = odm.Keyword() - state_change_date = odm.Optional(odm.Date(store=False)) - state_change_user = odm.Optional(odm.Keyword(store=False)) - stats = odm.Compound(Statistics, default={}) - status = odm.Enum(values=RULE_STATUSES, copyto="__text__") - type = odm.Keyword(copyto="__text__") + classification = odm.Classification(store=True, default=Classification.UNRESTRICTED, description="Indicates the sensitivity level of the signature, which dictates who can access it based on their clearance.") + data = odm.Text(index=False, store=False, description="Stores the actual signature data or pattern used for malware detection.") + last_modified = odm.Date(default="NOW", description="Records the timestamp of the most recent update to the signature. Defaults to the current time when the signature is modified.") + name = odm.Keyword(copyto="__text__", description="A unique and descriptive name for the signature.") + order = odm.Integer(default=1, store=False, description="**TODO**:Lower number means higher priority? **Generated**:Determines the processing order of the signature relative to others. A lower number indicates higher priority.") + revision = odm.Keyword(default="1", description="Tracks the version of the signature, with the default starting value set to \"1\".") + signature_id = odm.Optional(odm.Keyword(), description="A unique identifier for the signature, which can be used for tracking and referencing purposes.") + source = odm.Keyword(description="Identifies the origin or the entity that provided the signature.") + state_change_date = odm.Optional(odm.Date(store=False), description="Captures the date when the signature's status was last updated. ") + state_change_user = odm.Optional(odm.Keyword(store=False), description="Records the username of the individual who last modified the signature's status.") + stats = odm.Compound(Statistics, default={}, description="Holds various statistical data related to the signature's performance and usage") + status = odm.Enum(values=RULE_STATUSES, copyto="__text__", description="Reflects the operational state of the signature, indicating whether it is deployed, in testing, or otherwise.") + type = odm.Keyword(copyto="__text__", description="Specifies the category or classification of the signature, which can be used for organizing and filtering signatures.") diff --git a/assemblyline/odm/models/statistics.py b/assemblyline/odm/models/statistics.py index 4fce5c2fd..253a3a43d 100644 --- a/assemblyline/odm/models/statistics.py +++ b/assemblyline/odm/models/statistics.py @@ -1,12 +1,17 @@ from assemblyline import odm -@odm.model(index=True, store=True, description="Statistics Model") +@odm.model(index=True, store=True, description="""The Statistics model within Assemblyline is a framework that gathers and organizes quantitative data from malware analysis. It offers users essential statistical information such as counts, minimum and maximum values, averages, and sums. These data points help to quantify different attributes of the analyzed events or items. + +In addition, the model includes fields that track the timing of events, such as when a particular signature was first seen or most recently seen. This information can be beneficial for tracking trends and patterns over time. + +By utilizing the Statistics model, users are equipped with the necessary data to perform searches and analyze malware using Lucene queries. This can aid in the efficient identification and examination of cyber threats. +""") class Statistics(odm.Model): - count = odm.Integer(default=0, description="Count of statistical hits") - min = odm.Integer(default=0, description="Minimum value of all stastical hits") - max = odm.Integer(default=0, description="Maximum value of all stastical hits") - avg = odm.Integer(default=0, description="Average of all stastical hits") - sum = odm.Integer(default=0, description="Sum of all stastical hits") - first_hit = odm.Optional(odm.Date(), description="Date of first hit of statistic") - last_hit = odm.Optional(odm.Date(), description="Date of last hit of statistic") + count = odm.Integer(default=0, description="Total number of times a particular data point or event has been recorded.") + min = odm.Integer(default=0, description="The smallest numerical value recorded among all statistical events.") + max = odm.Integer(default=0, description="The largest numerical value recorded among all statistical events.") + avg = odm.Integer(default=0, description="The mean value derived from the sum of all recorded statistical events divided by the count.") + sum = odm.Integer(default=0, description="The aggregate total of all numerical values from the statistical events.") + first_hit = odm.Optional(odm.Date(), description="The date when the first recorded instance of the statistical event occurred.") + last_hit = odm.Optional(odm.Date(), description="The date when the most recent instance of the statistical event was recorded.") diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 247f39043..4a113c1ef 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -148,7 +148,7 @@ class Submission(odm.Model): expiry_ts = odm.Optional(odm.Date(store=False), description="An optional timestamp specifying when the submission will expire and be purged from the system.", ai=False) file_count = odm.Integer(description="The total number of files included in the submission.", ai=False) files: list[File] = odm.List(odm.Compound(File), description="An array of File objects representing the files submitted for analysis, including their names, sizes, and SHA256 hashes.") - max_score = odm.Integer(description="The highest score assigned to any file within the submission, used to indicate the potential threat level. The score is defined as the following: -1000: safe, [0, 299]: informative, [300 - 699]: suspicious, [700, 999]: highly suspicious, [1000, +inf): malicious.") + max_score = odm.Integer(description="The highest score assigned to any file within the submission, serving as an indicator of the overall potential threat level of the submission.") metadata = odm.FlattenedObject(store=False, description="A flattened object containing additional metadata associated with the submission.") params: SubmissionParams = odm.Compound(SubmissionParams, description="Encapsulates the configuration settings and preferences for how the submission is to be processed.", ai=False) results: list[str] = odm.List(odm.Keyword(), store=False, description="A list of identifiers for the results generated by the analysis services.", ai=False) From d14ae1c07831c31de1458483229c0c165aae4d04 Mon Sep 17 00:00:00 2001 From: "Eric L (CCCS)" Date: Fri, 11 Oct 2024 13:22:21 -0400 Subject: [PATCH 05/11] Update assemblyline/odm/models/submission.py Suggestion from code review. Co-authored-by: gdesmar <75089569+gdesmar@users.noreply.github.com> --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 4a113c1ef..e1980dea3 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -144,7 +144,7 @@ class Submission(odm.Model): archived = odm.Boolean(default=False, description="A boolean indicating whether the submission, including the file(s), has been stored in the malware archive.", ai=False) classification = odm.Classification(description="Security classification level of the submission.") error_count = odm.Integer(description="The total number of errors encountered during the analysis process.", ai=False) - errors: list[str] = odm.List(odm.Keyword(), store=False, description="A list of error identifiers that were triggered throughout the processing of the submission.", ai=False) + errors: list[str] = odm.List(odm.Keyword(), store=False, description="A list of error identifiers that were encountered during the analysis process.", ai=False) expiry_ts = odm.Optional(odm.Date(store=False), description="An optional timestamp specifying when the submission will expire and be purged from the system.", ai=False) file_count = odm.Integer(description="The total number of files included in the submission.", ai=False) files: list[File] = odm.List(odm.Compound(File), description="An array of File objects representing the files submitted for analysis, including their names, sizes, and SHA256 hashes.") From 62d05ae1596f2e59e808936997775c49d1f18a4f Mon Sep 17 00:00:00 2001 From: "Eric L (CCCS)" Date: Fri, 11 Oct 2024 13:41:46 -0400 Subject: [PATCH 06/11] Update assemblyline/odm/models/submission.py Co-authored-by: gdesmar <75089569+gdesmar@users.noreply.github.com> --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index e1980dea3..1d51f8ca7 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -82,7 +82,7 @@ class SubmissionParams(odm.Model): submitter = odm.Keyword(store=True, copyto="__text__", description="The username of the individual who submitted the file for analysis.") ttl = odm.Integer(default=0, description="**TODO**: **Original**:Time, in days, to live for this submission **Generated**:The time-to-live for the submission, defining how many days it will be retained before expiry.") type = odm.Keyword(default="USER", description="The type of submission (e.g., `USER` for user-submitted), indicating its origin or purpose.") - initial_data = odm.Optional(odm.Text(index=False), description="Initial temporary data provided at the time of submission, which may be used during analysis.") + initial_data = odm.Optional(odm.Text(index=False), description="Content provided at the time of submission to initialize the temporary submission data, which may be used during analysis.") auto_archive = odm.Boolean(default=False, description="Determines whether the submission should automatically be archived upon completion.") delete_after_archive = odm.Boolean( From 687ca749793ffd72970205c516feb18de272324e Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Tue, 15 Oct 2024 08:16:00 -0400 Subject: [PATCH 07/11] Update from review --- assemblyline/odm/models/result.py | 16 ++++++++-------- assemblyline/odm/models/submission.py | 27 +++++++++++++-------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/assemblyline/odm/models/result.py b/assemblyline/odm/models/result.py index cff6bb3c3..7dd41edfd 100644 --- a/assemblyline/odm/models/result.py +++ b/assemblyline/odm/models/result.py @@ -43,7 +43,7 @@ class Attack(odm.Model): class Signature(odm.Model): name = odm.Keyword(copyto="__text__", description="Name of the detection signature that triggered the heuristic.") frequency = odm.Integer(default=1, description="The count of how many times this particular signature has triggered the heuristic during analysis.") - safe = odm.Boolean(default=False, description="A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score. **TODO**:Is the last part about not contributing to the score true?") + safe = odm.Boolean(default=False, description="A boolean indicating whether the signature is considered safe and has been safelisted, thus not contributing to the score.") @odm.model(index=True, store=False, description="Heuristic associated to the Section") @@ -61,16 +61,16 @@ class Section(odm.Model): auto_collapse = odm.Boolean(default=False, description="Indicates whether the section should be initially displayed as collapsed in the user interface.", ai=False) body = odm.Optional(odm.Text(copyto="__text__"), description="The main content of the result section, which may include detailed analysis findings or descriptions.") classification = odm.Classification(description="The classification level assigned to the information within the section, dictating who can view it.", ai=False) - body_format = odm.Enum(values=BODY_FORMAT, index=False, description="**TODO**: **Original**:Type of body in this section **Generated**:The format of the body content, such as text, JSON, or image, which determines how it is displayed.") + body_format = odm.Enum(values=BODY_FORMAT, index=False, description="The format of the body content, such as text, JSON, or image, which determines how it is displayed.") body_config = odm.Optional(odm.Mapping(odm.Any(), index=False), - description="**TODO**: **Original**:Configurations for the body of this section **Generated**:Additional configurations that specify how the body content should be rendered or processed.", ai=False) - depth = odm.Integer(index=False, description="**TODO**: **Original**:Depth of the section **Generated**:The nesting level of the section within the overall result hierarchy, used for organizing complex results.", ai=False) + description="Additional configurations that specify how the body content should be rendered or processed.", ai=False) + depth = odm.Integer(index=False, description="The nesting level of the section within the overall result hierarchy, used for organizing complex results.", ai=False) heuristic = odm.Optional(odm.Compound(Heuristic), description="The heuristic analysis that contributed to the scoring of this section, if applicable.") tags = odm.Compound(Tagging, default={}, description="A collection of tags that categorize or label the section based on the analysis findings.") safelisted_tags = odm.FlattenedListObject(store=False, default={}, description="Tags that have been deemed safe and are excluded from contributing to the overall threat score.", ai=False) title_text = odm.Text(copyto="__text__", description="The title of the section, summarizing its content or purpose.") promote_to = odm.Optional(odm.Enum( - values=PROMOTE_TO, ai=False), description="**TODO**: **Original**:This is the type of data that the current section should be promoted to. *Generated**:The category of data that this section's content should be elevated to for reporting or further analysis.") + values=PROMOTE_TO, ai=False), description="The category of data that this section's content should be elevated to for reporting or further analysis.") @odm.model(index=True, store=True, description="Result Body") @@ -131,10 +131,10 @@ class Result(odm.Model): response: ResponseBody = odm.compound(ResponseBody, description="The container for all the response data provided by the service after analyzing the file.") result: ResultBody = odm.compound(ResultBody, default={}, description="The container for the detailed results of the analysis, including sections and scores.") sha256 = odm.SHA256(store=False, description="The SHA256 hash of the file that was analyzed, linking the result to the specific artifact.") - type = odm.Optional(odm.Keyword(), description="**TODO**") - size = odm.Optional(odm.Integer(), description="**TODO**") + type = odm.Optional(odm.Keyword(), description="The MIME type or other file classification identified by Assemblyline that is linked to the result, providing insight into the file's content or format.") + size = odm.Optional(odm.Integer(), description="The size (in bytes) of the analyzed file pertinent to the result.") drop_file = odm.Boolean(default=False, description="A flag indicating whether the file should be excluded from subsequent analysis stages.", ai=False) - from_archive = odm.Boolean(index=False, default=False, description="**TODO**: **Original**:Was loaded from the archive **Generated**:Indicates whether the result was retrieved from an archive rather than produced from a recent analysis.", ai=False) + from_archive = odm.Boolean(index=False, default=False, description="Indicates whether the result was retrieved from an archive rather than produced from a recent analysis.", ai=False) def build_key(self, service_tool_version=None, task=None): return self.help_build_key( diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 4a113c1ef..38d3e143c 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -23,15 +23,14 @@ class File(odm.Model): @odm.model(index=False, store=False, description="""Outlines the services selected for analysis, any excluded services, and any additional services that should be used in the case of rescan or resubmit actions. """) class ServiceSelection(odm.Model): - selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="Services selected to process the submission, which determine the scope of analysis.") - excluded = odm.List(odm.Keyword(), default=[], description="Services explicitly excluded from processing the submission, bypassing their analysis.") + selected = odm.List(odm.Keyword(), default=DEFAULT_SRV_SEL, description="Services and/or service groups selected to process the submission, which determine the scope of analysis.") + excluded = odm.List(odm.Keyword(), default=[], description="Services and/or service groups explicitly excluded from processing the submission, bypassing their analysis.") rescan = odm.List( odm.Keyword(), default=[], - description="Services to be used for a rescan if the submission's initial results are deemed malicious.") + description="Services and/or service groups to be used for a rescan if the submission's initial results are deemed malicious.") resubmit = odm.List(odm.Keyword(), default=DEFAULT_RESUBMIT, description="Additional services that are added to the selection when a submission is resubmitted.") - runtime_excluded = odm.List(odm.Keyword(), default=[], description="**TODO**: **Original**:List of runtime excluded services **Generated**:Services that are excluded during the submission's runtime, possibly due to dynamic decision-making.") # Fields in the parameters used to calculate hashes used for result caching @@ -54,10 +53,10 @@ class ServiceSelection(odm.Model): class SubmissionParams(odm.Model): classification = odm.Classification(default=Classification.UNRESTRICTED, description="The initial security classification for the submission, indicating its confidentiality.") - deep_scan = odm.Boolean(default=False, description="**TODO**: **Original**:Should a deep scan be performed? **Generated**:Specifies whether a more intensive scan should be conducted on the submission.") - description = odm.Text(store=True, copyto="__text__", description="A user-provided description or notes regarding the submission.") - generate_alert = odm.Boolean(default=False, description="**TODO**: **Original**:Should this submission generate an alert? **Generated**:Determines if an alert should be generated upon analysis completion.") - groups = odm.List(odm.Keyword(), default=[], description="**TODO**: **Original**:List of groups related to this scan **Generated**:A list of group identifiers relevant to this submission, often used for access control.") + deep_scan = odm.Boolean(default=False, description="Enables a comprehensive examination of the submission by lifting standard safeguards and constraints, utilizing experimental methods and allowing for the exploration of potentially ambiguous findings to maximize the extraction of information.") + description = odm.Text(store=True, copyto="__text__", description="A narrative that outlines the content and purpose of the submission.") + generate_alert = odm.Boolean(default=False, description="Determines if an alert should be generated upon analysis completion.") + groups = odm.List(odm.Keyword(), default=[], description="A list of group identifiers relevant to this submission, often used for access control.") ignore_cache = odm.Boolean(default=False, description="Bypasses any cached results for services, forcing all services to process the submission anew.") ignore_recursion_prevention = odm.Boolean( default=False, description="Overrides the default mechanism that prevents recursive scanning of extracted files.") @@ -68,19 +67,19 @@ class SubmissionParams(odm.Model): ignore_filtering = odm.Boolean(default=False, description="**TODO**: **Original**:Should we ignore filtering services? **Generated**:Indicates if filtering services should be skipped, allowing all files to be processed.") ignore_size = odm.Boolean(default=False, description="Allows the submission to bypass any file size restrictions set by the system.") - never_drop = odm.Boolean(default=False, description="**TODO**: **Original**:Exempt from being dropped by ingester? **Generated**:Ensures the submission will not be dropped by the ingestion service, regardless of system load.") + never_drop = odm.Boolean(default=False, description="Ensures the submission will not be dropped by the ingestion service, regardless of system load.") malicious = odm.Boolean(default=False, description="**TODO**: **Original**:Is the file submitted already known to be malicious? **Generated**:Flags the submission as known to be malicious, possibly altering its handling.") max_extracted = odm.Integer(default=500, description="The maximum number of files that can be extracted from the submission for separate analysis.") max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") - profile = odm.Boolean(default=False, description="**TODO**: **Original**:Should the submission do extra profiling? **Generated**:Triggers additional profiling of the submission for performance and analysis metrics.") + profile = odm.Boolean(default=False, description="Triggers additional profiling of the submission for performance and analysis metrics.") psid = odm.Optional(odm.UUID(), description="The ID of a parent submission, if this submission is related to or derived from another.") quota_item = odm.Boolean(default=False, description="Indicates if this submission should count against the submitter's quota.") services = odm.Compound(ServiceSelection, default={}, description="Details which services are selected or excluded from processing this submission.") service_spec = odm.Mapping(odm.Mapping(odm.Any()), default={}, index=False, store=False, description="A dictionary specifying service-specific parameters that may alter service behavior for this submission.") - submitter = odm.Keyword(store=True, copyto="__text__", description="The username of the individual who submitted the file for analysis.") - ttl = odm.Integer(default=0, description="**TODO**: **Original**:Time, in days, to live for this submission **Generated**:The time-to-live for the submission, defining how many days it will be retained before expiry.") + submitter = odm.Keyword(store=True, copyto="__text__", description="Name of the account that submitted the file for analysis.") + ttl = odm.Integer(default=0, description="The time-to-live for the submission, defining how many days it will be retained before expiry.") type = odm.Keyword(default="USER", description="The type of submission (e.g., `USER` for user-submitted), indicating its origin or purpose.") initial_data = odm.Optional(odm.Text(index=False), description="Initial temporary data provided at the time of submission, which may be used during analysis.") auto_archive = odm.Boolean(default=False, @@ -89,7 +88,7 @@ class SubmissionParams(odm.Model): default=False, description="Specifies if the submission data should be deleted from active storage immediately after archiving.") use_archive_alternate_dtl = odm.Boolean(default=False, - description="**TODO**: **Original**:Should we use the alternate dtl while archiving? **Generated**:Indicates if an alternate data lifetime should be applied to the submission once archived.") + description="Indicates if an alternate data lifetime should be applied to the submission once archived.") def get_hashing_keys(self): """Get the sections of the submission parameters that should be used in result hashes.""" @@ -124,7 +123,7 @@ def create_filescore_key(self, sha256, services: list = None): """) class Times(odm.Model): completed = odm.Optional(odm.Date(store=False), description="Timestamp recording when the analysis of the submission was completed.") - submitted = odm.Date(default="NOW", description="Timestamp marking when the submission was initiated and began the analysis process.") + submitted = odm.Date(default="NOW", description="Timestamp recording when the submission was initiated and began the analysis process.") @odm.model(index=True, store=False, description="""Reflects the consensus on whether the submission is deemed malicious or non-malicious based on user input. From 6d55f9f355910515623f3a8d7f6b0e29e7afdd91 Mon Sep 17 00:00:00 2001 From: "Eric L (CCCS)" Date: Thu, 17 Oct 2024 10:56:28 -0400 Subject: [PATCH 08/11] Update assemblyline/odm/models/submission.py Co-authored-by: gdesmar <75089569+gdesmar@users.noreply.github.com> --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index f1e8459aa..03e3fd458 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -73,7 +73,7 @@ class SubmissionParams(odm.Model): max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") profile = odm.Boolean(default=False, description="Triggers additional profiling of the submission for performance and analysis metrics.") - psid = odm.Optional(odm.UUID(), description="The ID of a parent submission, if this submission is related to or derived from another.") + psid = odm.Optional(odm.UUID(), description="The ID of a parent submission, if this submission is related to an extension of another.") quota_item = odm.Boolean(default=False, description="Indicates if this submission should count against the submitter's quota.") services = odm.Compound(ServiceSelection, default={}, description="Details which services are selected or excluded from processing this submission.") service_spec = odm.Mapping(odm.Mapping(odm.Any()), default={}, index=False, store=False, From c9e508c524ced01d4f531e4af9df13baf29e1150 Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Thu, 17 Oct 2024 11:07:58 -0400 Subject: [PATCH 09/11] Update classification and verdict description --- assemblyline/odm/models/submission.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index f1e8459aa..601fcd1ff 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -52,7 +52,7 @@ class ServiceSelection(odm.Model): """) class SubmissionParams(odm.Model): classification = odm.Classification(default=Classification.UNRESTRICTED, - description="The initial security classification for the submission, indicating its confidentiality.") + description="The classification level assigned to the submission, signifying its sensitivity and dictating access controls.") deep_scan = odm.Boolean(default=False, description="Enables a comprehensive examination of the submission by lifting standard safeguards and constraints, utilizing experimental methods and allowing for the exploration of potentially ambiguous findings to maximize the extraction of information.") description = odm.Text(store=True, copyto="__text__", description="A narrative that outlines the content and purpose of the submission.") generate_alert = odm.Boolean(default=False, description="Determines if an alert should be generated upon analysis completion.") @@ -126,7 +126,7 @@ class Times(odm.Model): submitted = odm.Date(default="NOW", description="Timestamp recording when the submission was initiated and began the analysis process.") -@odm.model(index=True, store=False, description="""Reflects the consensus on whether the submission is deemed malicious or non-malicious based on user input. +@odm.model(index=True, store=False, description="""Records user feedback regarding the perceived maliciousness of a submission, serving as an aggregation of user opinions rather than the definitive verdict of the system. """) class Verdict(odm.Model): malicious = odm.List(odm.Keyword(), default=[], description="A list of usernames representing users who have judged the submission to be malicious.") From e4eeaccbd1d635f5c94b50e032b37af2345bfe8b Mon Sep 17 00:00:00 2001 From: "Eric L (CCCS)" Date: Thu, 17 Oct 2024 14:06:44 -0400 Subject: [PATCH 10/11] Update assemblyline/odm/models/submission.py --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 03e3fd458..96b8b29b6 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -68,7 +68,7 @@ class SubmissionParams(odm.Model): ignore_filtering = odm.Boolean(default=False, description="**TODO**: **Original**:Should we ignore filtering services? **Generated**:Indicates if filtering services should be skipped, allowing all files to be processed.") ignore_size = odm.Boolean(default=False, description="Allows the submission to bypass any file size restrictions set by the system.") never_drop = odm.Boolean(default=False, description="Ensures the submission will not be dropped by the ingestion service, regardless of system load.") - malicious = odm.Boolean(default=False, description="**TODO**: **Original**:Is the file submitted already known to be malicious? **Generated**:Flags the submission as known to be malicious, possibly altering its handling.") + malicious = odm.Boolean(default=False, description="If set to true, the system acknowledges the user's assertion that the file is malicious by automatically casting a malicious vote for the submission.") max_extracted = odm.Integer(default=500, description="The maximum number of files that can be extracted from the submission for separate analysis.") max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") From b9d053ad8750dc3e9141399422bca5cbe9b4cf9b Mon Sep 17 00:00:00 2001 From: cccs-eric Date: Thu, 17 Oct 2024 14:14:00 -0400 Subject: [PATCH 11/11] Fix psid description --- assemblyline/odm/models/submission.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index f9f961b07..e57d6be63 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -73,7 +73,7 @@ class SubmissionParams(odm.Model): max_supplementary = odm.Integer(default=500, description="**TODO**: **Original**:Max number of supplementary files **Generated**:The maximum number of supplementary files that can be generated from the submission.") priority = odm.Integer(default=1000, description="**TODO**: **Original**:Priority of the scan **Generated**:The processing priority of the submission, with lower numbers indicating higher priority.") profile = odm.Boolean(default=False, description="Triggers additional profiling of the submission for performance and analysis metrics.") - psid = odm.Optional(odm.UUID(), description="The ID of a parent submission, if this submission is related to an extension of another.") + psid = odm.Optional(odm.UUID(), description="The ID of a parent submission when the current submission is a follow-up or an extension of the parent submission.") quota_item = odm.Boolean(default=False, description="Indicates if this submission should count against the submitter's quota.") services = odm.Compound(ServiceSelection, default={}, description="Details which services are selected or excluded from processing this submission.") service_spec = odm.Mapping(odm.Mapping(odm.Any()), default={}, index=False, store=False,