From 25fbbeb076cf06958f0b33254c80cd311ecc1821 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 27 Feb 2025 16:39:51 +0000 Subject: [PATCH 1/8] minor cleanup --- assemblyline/datastore/collection.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/assemblyline/datastore/collection.py b/assemblyline/datastore/collection.py index cb0838443..43ebfd3a2 100644 --- a/assemblyline/datastore/collection.py +++ b/assemblyline/datastore/collection.py @@ -15,7 +15,7 @@ from datetime import datetime from enum import Enum from os import environ -from typing import Dict, Any, Union, TypeVar, Generic +from typing import Dict, Any, Union, TypeVar, Generic, Optional import elasticsearch import elasticsearch.helpers @@ -222,8 +222,8 @@ def __init__(self, datastore: ESStore, name, model_class=None, validate=True, ar if field.store: self.stored_fields[name] = field - def is_archive_index(self, index): - return self.archive_name and index.startswith(self.archive_name) + def is_archive_index(self, index) -> bool: + return bool(self.archive_name and index.startswith(self.archive_name)) def get_index_list(self, index_type): # Default value @@ -2032,17 +2032,17 @@ def __get_possible_fields(self, field): return field_types - def _check_fields(self, model=None): + def _check_fields(self, target_model: Optional[odm.Model] = None): if not self.validate: return - if model is None: + if target_model is None: if self.model_class: return self._check_fields(self.model_class) return fields = self.fields() - model = self.model_class.flat_fields(skip_mappings=True) + model = target_model.flat_fields(skip_mappings=True) missing = set(model.keys()) - set(fields.keys()) if missing: @@ -2071,7 +2071,7 @@ def _ensure_collection(self): index = f"{alias}_hot" # Create HOT index if not self.with_retries(self.datastore.client.indices.exists, index=alias): - log.debug(f"Index {alias.upper()} does not exists. Creating it now...") + log.debug("Index %s does not exists. Creating it now...", alias.upper()) try: self.with_retries(self.datastore.client.indices.create, index=index, mappings=self._get_index_mappings(), @@ -2079,7 +2079,7 @@ def _ensure_collection(self): except elasticsearch.exceptions.RequestError as e: if "resource_already_exists_exception" not in str(e): raise - log.warning(f"Tried to create an index template that already exists: {alias.upper()}") + log.warning("Tried to create an index template that already exists: %s", alias.upper()) self.with_retries(self.datastore.client.indices.put_alias, index=index, name=alias) elif not self.with_retries(self.datastore.client.indices.exists, index=index) and \ From 24f728f64bfc6ca35697fd59387f1edb55814624 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 27 Feb 2025 18:00:50 +0000 Subject: [PATCH 2/8] support wildcard and long the way we want --- assemblyline/datastore/support/build.py | 8 ++- assemblyline/odm/__init__.py | 7 ++- assemblyline/odm/base.py | 61 +++++++++--------- test/test_odm_mapping.py | 83 +++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 33 deletions(-) create mode 100644 test/test_odm_mapping.py diff --git a/assemblyline/datastore/support/build.py b/assemblyline/datastore/support/build.py index 4f713100b..1b0af6f5c 100644 --- a/assemblyline/datastore/support/build.py +++ b/assemblyline/datastore/support/build.py @@ -1,5 +1,5 @@ from assemblyline.odm.base import _Field -from assemblyline.odm import Keyword, Text, List, Compound, Date, Integer, Long, \ +from assemblyline.odm import Keyword, Wildcard, Text, List, Compound, Date, Integer, Long, \ Float, Boolean, Mapping, Classification, Enum, Any, UUID, Optional, IP, Domain, URI, URIPath, MAC, PhoneNumber, \ SSDeepHash, SHA1, SHA256, MD5, Platform, Processor, ClassificationString, FlattenedObject, Email, UpperKeyword, \ Json, ValidatedKeyword, UNCPath @@ -7,6 +7,7 @@ # Simple types can be resolved by a direct mapping __type_mapping = { Keyword: 'keyword', + Wildcard: 'wildcard', Boolean: 'boolean', Integer: 'integer', Long: 'long', @@ -111,6 +112,11 @@ def set_mapping(temp_field: _Field, body): "analyzer": __analyzer_mapping[field.__class__] }) + elif isinstance(field, Wildcard): + es_data_type = __type_mapping[field.__class__] + data = {'type': es_data_type} + mappings[name.strip(".")] = data + elif isinstance(field, Keyword): es_data_type = __type_mapping[field.__class__] data = {'type': es_data_type} diff --git a/assemblyline/odm/__init__.py b/assemblyline/odm/__init__.py index 88bef276a..6a99ec36c 100644 --- a/assemblyline/odm/__init__.py +++ b/assemblyline/odm/__init__.py @@ -5,7 +5,8 @@ # Imports that have the same effect as some part of the one above so that # type checking can use this file properly. -from assemblyline.odm.base import Keyword, Optional, Boolean, Integer, List, Compound, Mapping, Date, Long, Enum +from assemblyline.odm.base import Keyword, Optional, Boolean, Integer, List, Compound, Mapping, \ + Date, Long, Enum, Wildcard from datetime import datetime _InnerType = typing.TypeVar("_InnerType") @@ -27,6 +28,10 @@ def keyword(*args, **kwargs) -> str: return typing.cast(str, Keyword(*args, **kwargs)) +def wildcard(*args, **kwargs) -> str: + return typing.cast(str, Wildcard(*args, **kwargs)) + + def date(*args, **kwargs) -> datetime: return typing.cast(datetime, Date(*args, **kwargs)) diff --git a/assemblyline/odm/base.py b/assemblyline/odm/base.py index 6a3ff54ff..7b5928626 100644 --- a/assemblyline/odm/base.py +++ b/assemblyline/odm/base.py @@ -19,6 +19,7 @@ import sys import unicodedata from datetime import datetime +import typing from typing import Any as _Any from typing import Dict, Tuple, Union @@ -290,6 +291,31 @@ def check(self, value, **kwargs): return str(value) +class Wildcard(Keyword): + """ + A keyword with enhanced indexing to support more complex queries. + """ + + def check(self, value, **kwargs): + if self.optional and value is None: + return None + + # We have a special case for bytes here due to how often strings and bytes + # get mixed up in python apis + if isinstance(value, bytes): + raise ValueError(f"[{self.name or self.parent_name}] Keyword doesn't accept bytes values") + + if value == '' or value is None: + if self.default_set: + value = self.default + else: + raise ValueError(f"[{self.name or self.parent_name}] Empty strings are not allowed without defaults") + + if value is None: + return None + + return str(value) + class EmptyableKeyword(_Field): """ A keyword which allow to differentiate between empty and None values. @@ -638,9 +664,9 @@ def check(self, value, **kwargs): class Integer(_Field): - """A field storing an integer value.""" + """A field storing a signed 32 bit integer value.""" - def __init__(self, max: int = None, min: int = None, *args, **kwargs): + def __init__(self, max: typing.Optional[int] = None, min: typing.Optional[int] = None, *args, **kwargs): super().__init__(*args, **kwargs) self.max = max self.min = min @@ -668,35 +694,8 @@ def check(self, value, **kwargs): return ret_val -class Long(_Field): - """A field storing an integer value.""" - - def __init__(self, max: int = None, min: int = None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.max = max - self.min = min - - def check(self, value, **kwargs): - if self.optional and value is None: - return None - - if value is None or value == "": - if self.default_set: - ret_val = self.default - else: - raise ValueError(f"[{self.name or self.parent_name}] No value provided and no default value set.") - else: - ret_val = int(value) - - # Test min/max - if self.max is not None and ret_val > self.max: - raise ValueError( - f"[{self.name or self.parent_name}] Value bigger then the max value. ({value} > {self.max})") - if self.min is not None and ret_val < self.min: - raise ValueError( - f"[{self.name or self.parent_name}] Value smaller then the min value. ({value} < {self.max})") - - return ret_val +class Long(Integer): + """A field storing a signed 64 bit integer value.""" class Float(_Field): diff --git a/test/test_odm_mapping.py b/test/test_odm_mapping.py new file mode 100644 index 000000000..236047163 --- /dev/null +++ b/test/test_odm_mapping.py @@ -0,0 +1,83 @@ +from assemblyline import odm +from assemblyline.datastore.collection import ESCollection +from assemblyline.datastore.support.build import build_mapping + + +@odm.model(index=True) +class OdmTestMapping1(odm.Model): + stable_text_field = odm.keyword() + swapped_text_field = odm.keyword() + stable_number_field = odm.integer() + swapped_number_field = odm.integer() + + +@odm.model(index=True) +class OdmTestMapping2(odm.Model): + stable_text_field = odm.keyword() + swapped_text_field = odm.wildcard() + stable_number_field = odm.integer() + swapped_number_field = odm.long() + + +def test_example_mapping_type(): + """Test that the example models produce the expected mapping types""" + properties, dynamic = build_mapping(OdmTestMapping1.fields().values()) + + # There should be no dynamic mappings, just one rule forbidding implicit mappings + assert len(dynamic) == 1 + assert 'refuse_all_implicit_mappings' in dynamic[0] + + # Check that the static fields have the mapping type we want + assert len(properties) == 4 + assert properties['stable_text_field']['type'] == 'keyword' + assert properties['swapped_text_field']['type'] == 'keyword' + assert properties['stable_number_field']['type'] == 'integer' + assert properties['swapped_number_field']['type'] == 'integer' + + properties, dynamic = build_mapping(OdmTestMapping2.fields().values()) + + # There should be no dynamic mappings, just one rule forbidding implicit mappings + assert len(dynamic) == 1 + assert 'refuse_all_implicit_mappings' in dynamic[0] + + # Check that the static fields have the mapping type we want + assert len(properties) == 4 + assert properties['stable_text_field']['type'] == 'keyword' + assert properties['swapped_text_field']['type'] == 'wildcard' + assert properties['stable_number_field']['type'] == 'integer' + assert properties['swapped_number_field']['type'] == 'long' + + +def test_field_upgrade_ok(datastore_connection): + """Test that changing a field from keyword to wildcard doesn't break anything.""" + # Clean up from any previous runs + collection = ESCollection(datastore_connection.ds, "testmapping", OdmTestMapping1, validate=False) + collection.wipe(recreate=False) + + # Create the collection in elastic + collection = ESCollection(datastore_connection.ds, "testmapping", OdmTestMapping1, validate=True) + properties = collection.fields() + assert properties['stable_text_field']['type'] == 'keyword' + assert properties['swapped_text_field']['type'] == 'keyword' + assert properties['stable_number_field']['type'] == 'integer' + assert properties['swapped_number_field']['type'] == 'integer' + + # Open that same collection using the new mapping + collection = ESCollection(datastore_connection.ds, "testmapping", OdmTestMapping2, validate=True) + + # Check that the fields haven't changed + properties = collection.fields() + assert properties['stable_text_field']['type'] == 'keyword' + assert properties['swapped_text_field']['type'] == 'keyword' + assert properties['stable_number_field']['type'] == 'integer' + assert properties['swapped_number_field']['type'] == 'integer' + + # Reindex + collection.reindex() + + # Check that the fields match the new model + properties = collection.fields() + assert properties['stable_text_field']['type'] == 'keyword' + assert properties['swapped_text_field']['type'] == 'wildcard' + assert properties['stable_number_field']['type'] == 'integer' + assert properties['swapped_number_field']['type'] == 'long' From 27ba72975d4e7715de906acfb41a318882bd2606 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 27 Feb 2025 18:08:13 +0000 Subject: [PATCH 3/8] switch file sizes to long --- assemblyline/odm/messages/task.py | 2 +- assemblyline/odm/models/alert.py | 2 +- assemblyline/odm/models/badlist.py | 2 +- assemblyline/odm/models/file.py | 2 +- assemblyline/odm/models/safelist.py | 2 +- assemblyline/odm/models/submission.py | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/assemblyline/odm/messages/task.py b/assemblyline/odm/messages/task.py index 27e63253e..253471b33 100644 --- a/assemblyline/odm/messages/task.py +++ b/assemblyline/odm/messages/task.py @@ -15,7 +15,7 @@ class FileInfo(odm.Model): mime = odm.Optional(odm.Keyword(), description="The libmagic mime type") sha1 = odm.SHA1(description="SHA1 hash of the file") sha256 = odm.SHA256(description="SHA256 hash of the file") - size = odm.Integer(description="Size of the file in bytes") + size = odm.long(description="Size of the file in bytes") ssdeep = odm.Optional(odm.SSDeepHash(description="SSDEEP hash of the file")) tlsh = odm.Optional(odm.Keyword(description="TLSH hash of the file")) type = odm.Keyword(description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/alert.py b/assemblyline/odm/models/alert.py index affb0a8a6..904646cbf 100644 --- a/assemblyline/odm/models/alert.py +++ b/assemblyline/odm/models/alert.py @@ -115,7 +115,7 @@ class File(odm.Model): name = odm.Keyword(copyto="__text__", description="The original name of the file as submitted.") sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file.") sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file.") - size = odm.Integer(store=False, description="The size of the file in bytes.") + size = odm.long(store=False, description="The size of the file in bytes.") type = odm.Keyword(copyto="__text__", description=" The file type as identified by Assemblyline's analysis.") screenshots = odm.List(odm.Compound(Screenshot), default=[], description="Screenshots taken of the file during analysis, if applicable.") diff --git a/assemblyline/odm/models/badlist.py b/assemblyline/odm/models/badlist.py index 117f6c08b..a557bfd5e 100644 --- a/assemblyline/odm/models/badlist.py +++ b/assemblyline/odm/models/badlist.py @@ -30,7 +30,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/file.py b/assemblyline/odm/models/file.py index aa433b086..1948a6e00 100644 --- a/assemblyline/odm/models/file.py +++ b/assemblyline/odm/models/file.py @@ -102,7 +102,7 @@ class File(odm.Model): seen = odm.Compound(Seen, default={}, description="Records the frequency and timestamps of when the file was encountered.", ai=False) sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file, providing a more secure alternative to MD5 for integrity checks.", ai=False) sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file, offering a high level of security for integrity verification.") - size = odm.Integer(description="Size of the file in bytes.") + size = odm.long(description="Size of the file in bytes.") ssdeep = odm.SSDeepHash(store=False, description="The fuzzy hash of the file using SSDEEP, which is useful for identifying similar files.", ai=False) type = odm.Keyword(copyto="__text__", description="The file type as determined by the AssemblyLine file type identification service.") tlsh = odm.Optional(odm.Keyword(copyto="__text__"), description="A locality-sensitive hash (TLSH) of the file's content, useful for similarity comparisons.", ai=False) diff --git a/assemblyline/odm/models/safelist.py b/assemblyline/odm/models/safelist.py index 0d6329efe..81900e8d0 100644 --- a/assemblyline/odm/models/safelist.py +++ b/assemblyline/odm/models/safelist.py @@ -17,7 +17,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 23615a4a0..66bceebb3 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -12,7 +12,7 @@ @odm.model(index=True, store=False, description="File Model of Submission") class File(odm.Model): name = odm.Keyword(copyto="__text__", description="Name of the file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") sha256 = odm.SHA256(copyto="__text__", description="SHA256 hash of the file") @@ -142,7 +142,7 @@ class Submission(odm.Model): max_score = odm.Integer(description="Maximum score of all the files in the scan") metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) - results: list[str] = odm.List(odm.Keyword(), store=False, description="List of result keys", ai=False) + results: list[str] = odm.List(odm.wildcard(), store=False, description="List of result keys", ai=False) sid: str = odm.UUID(copyto="__text__", description="Submission ID") state = odm.Enum(values=SUBMISSION_STATES, description="Status of the submission", ai=False) to_be_deleted = odm.Boolean( From f1815b5cb1ed298c3d30ee29ccf245eb5a7a4f09 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 27 Feb 2025 19:16:10 +0000 Subject: [PATCH 4/8] also file size config --- assemblyline/odm/models/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/config.py b/assemblyline/odm/models/config.py index b9d767a1f..25f3fced0 100644 --- a/assemblyline/odm/models/config.py +++ b/assemblyline/odm/models/config.py @@ -2014,7 +2014,7 @@ class Submission(odm.Model): emptyresult_dtl: int = odm.Integer(min=0, description="Number of days emptyresult will remain in the system") max_dtl: int = odm.Integer(min=0, description="Maximum number of days submissions will remain in the system") max_extraction_depth: int = odm.Integer(description="Maximum files extraction depth") - max_file_size: int = odm.Integer(description="Maximum size for files submitted in the system") + max_file_size: int = odm.long(description="Maximum size for files submitted in the system") max_metadata_length: int = odm.Integer(description="Maximum length for each metadata values") max_temp_data_length: int = odm.Integer(description="Maximum length for each temporary data values") metadata: MetadataConfig = odm.Compound(MetadataConfig, default=DEFAULT_METADATA_CONFIGURATION, From 9a6ec30dfa0610c5e73e40c8d090600dba90a0b5 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Fri, 28 Feb 2025 15:34:06 +0000 Subject: [PATCH 5/8] remove model changes --- assemblyline/datastore/collection.py | 8 ++++---- assemblyline/odm/messages/task.py | 2 +- assemblyline/odm/models/alert.py | 2 +- assemblyline/odm/models/badlist.py | 2 +- assemblyline/odm/models/file.py | 2 +- assemblyline/odm/models/safelist.py | 2 +- assemblyline/odm/models/submission.py | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/assemblyline/datastore/collection.py b/assemblyline/datastore/collection.py index 43ebfd3a2..4f64dd78a 100644 --- a/assemblyline/datastore/collection.py +++ b/assemblyline/datastore/collection.py @@ -8,19 +8,19 @@ import typing import warnings -from copy import deepcopy -from assemblyline.common.isotime import now_as_iso -from datemath import dm -from datemath.helpers import DateMathException from datetime import datetime from enum import Enum from os import environ from typing import Dict, Any, Union, TypeVar, Generic, Optional +from copy import deepcopy +from datemath import dm +from datemath.helpers import DateMathException import elasticsearch import elasticsearch.helpers from assemblyline import odm +from assemblyline.common.isotime import now_as_iso from assemblyline.common.dict_utils import recursive_update from assemblyline.datastore.bulk import ElasticBulkPlan from assemblyline.datastore.exceptions import ( diff --git a/assemblyline/odm/messages/task.py b/assemblyline/odm/messages/task.py index 253471b33..27e63253e 100644 --- a/assemblyline/odm/messages/task.py +++ b/assemblyline/odm/messages/task.py @@ -15,7 +15,7 @@ class FileInfo(odm.Model): mime = odm.Optional(odm.Keyword(), description="The libmagic mime type") sha1 = odm.SHA1(description="SHA1 hash of the file") sha256 = odm.SHA256(description="SHA256 hash of the file") - size = odm.long(description="Size of the file in bytes") + size = odm.Integer(description="Size of the file in bytes") ssdeep = odm.Optional(odm.SSDeepHash(description="SSDEEP hash of the file")) tlsh = odm.Optional(odm.Keyword(description="TLSH hash of the file")) type = odm.Keyword(description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/alert.py b/assemblyline/odm/models/alert.py index 904646cbf..affb0a8a6 100644 --- a/assemblyline/odm/models/alert.py +++ b/assemblyline/odm/models/alert.py @@ -115,7 +115,7 @@ class File(odm.Model): name = odm.Keyword(copyto="__text__", description="The original name of the file as submitted.") sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file.") sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file.") - size = odm.long(store=False, description="The size of the file in bytes.") + size = odm.Integer(store=False, description="The size of the file in bytes.") type = odm.Keyword(copyto="__text__", description=" The file type as identified by Assemblyline's analysis.") screenshots = odm.List(odm.Compound(Screenshot), default=[], description="Screenshots taken of the file during analysis, if applicable.") diff --git a/assemblyline/odm/models/badlist.py b/assemblyline/odm/models/badlist.py index a557bfd5e..117f6c08b 100644 --- a/assemblyline/odm/models/badlist.py +++ b/assemblyline/odm/models/badlist.py @@ -30,7 +30,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.long(), description="Size of the file in bytes") + size = odm.Optional(odm.Integer(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/file.py b/assemblyline/odm/models/file.py index 1948a6e00..aa433b086 100644 --- a/assemblyline/odm/models/file.py +++ b/assemblyline/odm/models/file.py @@ -102,7 +102,7 @@ class File(odm.Model): seen = odm.Compound(Seen, default={}, description="Records the frequency and timestamps of when the file was encountered.", ai=False) sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file, providing a more secure alternative to MD5 for integrity checks.", ai=False) sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file, offering a high level of security for integrity verification.") - size = odm.long(description="Size of the file in bytes.") + size = odm.Integer(description="Size of the file in bytes.") ssdeep = odm.SSDeepHash(store=False, description="The fuzzy hash of the file using SSDEEP, which is useful for identifying similar files.", ai=False) type = odm.Keyword(copyto="__text__", description="The file type as determined by the AssemblyLine file type identification service.") tlsh = odm.Optional(odm.Keyword(copyto="__text__"), description="A locality-sensitive hash (TLSH) of the file's content, useful for similarity comparisons.", ai=False) diff --git a/assemblyline/odm/models/safelist.py b/assemblyline/odm/models/safelist.py index 81900e8d0..0d6329efe 100644 --- a/assemblyline/odm/models/safelist.py +++ b/assemblyline/odm/models/safelist.py @@ -17,7 +17,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.long(), description="Size of the file in bytes") + size = odm.Optional(odm.Integer(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 66bceebb3..23615a4a0 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -12,7 +12,7 @@ @odm.model(index=True, store=False, description="File Model of Submission") class File(odm.Model): name = odm.Keyword(copyto="__text__", description="Name of the file") - size = odm.Optional(odm.long(), description="Size of the file in bytes") + size = odm.Optional(odm.Integer(), description="Size of the file in bytes") sha256 = odm.SHA256(copyto="__text__", description="SHA256 hash of the file") @@ -142,7 +142,7 @@ class Submission(odm.Model): max_score = odm.Integer(description="Maximum score of all the files in the scan") metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) - results: list[str] = odm.List(odm.wildcard(), store=False, description="List of result keys", ai=False) + results: list[str] = odm.List(odm.Keyword(), store=False, description="List of result keys", ai=False) sid: str = odm.UUID(copyto="__text__", description="Submission ID") state = odm.Enum(values=SUBMISSION_STATES, description="Status of the submission", ai=False) to_be_deleted = odm.Boolean( From 4c99ea5b5deee46b7c7ed4e7b680003cd273c1f5 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Mon, 3 Mar 2025 16:57:26 +0000 Subject: [PATCH 6/8] changes to the mappings --- assemblyline/odm/models/alert.py | 2 +- assemblyline/odm/models/badlist.py | 2 +- assemblyline/odm/models/file.py | 2 +- assemblyline/odm/models/safelist.py | 2 +- assemblyline/odm/models/submission.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/assemblyline/odm/models/alert.py b/assemblyline/odm/models/alert.py index affb0a8a6..904646cbf 100644 --- a/assemblyline/odm/models/alert.py +++ b/assemblyline/odm/models/alert.py @@ -115,7 +115,7 @@ class File(odm.Model): name = odm.Keyword(copyto="__text__", description="The original name of the file as submitted.") sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file.") sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file.") - size = odm.Integer(store=False, description="The size of the file in bytes.") + size = odm.long(store=False, description="The size of the file in bytes.") type = odm.Keyword(copyto="__text__", description=" The file type as identified by Assemblyline's analysis.") screenshots = odm.List(odm.Compound(Screenshot), default=[], description="Screenshots taken of the file during analysis, if applicable.") diff --git a/assemblyline/odm/models/badlist.py b/assemblyline/odm/models/badlist.py index 117f6c08b..a557bfd5e 100644 --- a/assemblyline/odm/models/badlist.py +++ b/assemblyline/odm/models/badlist.py @@ -30,7 +30,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/file.py b/assemblyline/odm/models/file.py index aa433b086..1948a6e00 100644 --- a/assemblyline/odm/models/file.py +++ b/assemblyline/odm/models/file.py @@ -102,7 +102,7 @@ class File(odm.Model): seen = odm.Compound(Seen, default={}, description="Records the frequency and timestamps of when the file was encountered.", ai=False) sha1 = odm.SHA1(copyto="__text__", description="The SHA1 hash of the file, providing a more secure alternative to MD5 for integrity checks.", ai=False) sha256 = odm.SHA256(copyto="__text__", description="The SHA256 hash of the file, offering a high level of security for integrity verification.") - size = odm.Integer(description="Size of the file in bytes.") + size = odm.long(description="Size of the file in bytes.") ssdeep = odm.SSDeepHash(store=False, description="The fuzzy hash of the file using SSDEEP, which is useful for identifying similar files.", ai=False) type = odm.Keyword(copyto="__text__", description="The file type as determined by the AssemblyLine file type identification service.") tlsh = odm.Optional(odm.Keyword(copyto="__text__"), description="A locality-sensitive hash (TLSH) of the file's content, useful for similarity comparisons.", ai=False) diff --git a/assemblyline/odm/models/safelist.py b/assemblyline/odm/models/safelist.py index 0d6329efe..81900e8d0 100644 --- a/assemblyline/odm/models/safelist.py +++ b/assemblyline/odm/models/safelist.py @@ -17,7 +17,7 @@ class Hashes(odm.Model): class File(odm.Model): name = odm.List(odm.Keyword(store=True, copyto="__text__"), default=[], description="List of names seen for that file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") type = odm.Optional(odm.Keyword(), description="Type of file as identified by Assemblyline") diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 23615a4a0..66bceebb3 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -12,7 +12,7 @@ @odm.model(index=True, store=False, description="File Model of Submission") class File(odm.Model): name = odm.Keyword(copyto="__text__", description="Name of the file") - size = odm.Optional(odm.Integer(), description="Size of the file in bytes") + size = odm.Optional(odm.long(), description="Size of the file in bytes") sha256 = odm.SHA256(copyto="__text__", description="SHA256 hash of the file") @@ -142,7 +142,7 @@ class Submission(odm.Model): max_score = odm.Integer(description="Maximum score of all the files in the scan") metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) - results: list[str] = odm.List(odm.Keyword(), store=False, description="List of result keys", ai=False) + results: list[str] = odm.List(odm.wildcard(), store=False, description="List of result keys", ai=False) sid: str = odm.UUID(copyto="__text__", description="Submission ID") state = odm.Enum(values=SUBMISSION_STATES, description="Status of the submission", ai=False) to_be_deleted = odm.Boolean( From a5ad2b4a953bdbaa968293b9c08a6e864341bb78 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 6 Mar 2025 15:46:56 +0000 Subject: [PATCH 7/8] index signature body change metadata indexing --- assemblyline/odm/models/signature.py | 2 +- assemblyline/odm/models/submission.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assemblyline/odm/models/signature.py b/assemblyline/odm/models/signature.py index 341ffed25..923795291 100644 --- a/assemblyline/odm/models/signature.py +++ b/assemblyline/odm/models/signature.py @@ -14,7 +14,7 @@ @odm.model(index=True, store=True) class Signature(odm.Model): classification = odm.Classification(store=True, default=Classification.UNRESTRICTED) - data = odm.Text(index=False, store=False) + data = odm.Text(copyto="__text__", store=False) last_modified = odm.Date(default="NOW") name = odm.Keyword(copyto="__text__") order = odm.Integer(default=1, store=False) diff --git a/assemblyline/odm/models/submission.py b/assemblyline/odm/models/submission.py index 66bceebb3..8f2f6afb4 100644 --- a/assemblyline/odm/models/submission.py +++ b/assemblyline/odm/models/submission.py @@ -140,7 +140,7 @@ class Submission(odm.Model): file_count = odm.Integer(description="Total number of files in the submission", ai=False) files: list[File] = odm.List(odm.Compound(File), description="List of files that were originally submitted") max_score = odm.Integer(description="Maximum score of all the files in the scan") - metadata = odm.FlattenedObject(store=False, description="Metadata associated to the submission") + metadata = odm.Mapping(odm.wildcard(), copyto="__text__", store=False, description="Metadata associated to the submission") params: SubmissionParams = odm.Compound(SubmissionParams, description="Submission parameter details", ai=False) results: list[str] = odm.List(odm.wildcard(), store=False, description="List of result keys", ai=False) sid: str = odm.UUID(copyto="__text__", description="Submission ID") From d522bddfd71b7f982da9b3722010e8eeb00bad48 Mon Sep 17 00:00:00 2001 From: Adam Douglass Date: Thu, 6 Mar 2025 15:53:37 +0000 Subject: [PATCH 8/8] change metadata mapping in alert --- assemblyline/odm/models/alert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assemblyline/odm/models/alert.py b/assemblyline/odm/models/alert.py index 904646cbf..e0763ed6a 100644 --- a/assemblyline/odm/models/alert.py +++ b/assemblyline/odm/models/alert.py @@ -202,7 +202,7 @@ class Alert(odm.Model): filtered = odm.Boolean(default=False, description="Indicates whether portions of the submission's analysis results have been omitted due to the user's classification level not meeting the required threshold for viewing certain data.") heuristic = odm.Compound(Heuristic, description="Data regarding the heuristics that triggered the alert.") label = odm.List(odm.Keyword(), copyto="__text__", default=[], description="Labels assigned to the alert for categorization and filtering.") - metadata = odm.FlattenedObject(default={}, store=False, description="Additional metadata provided with the file at the time of submission.") + metadata = odm.Mapping(odm.wildcard(), copyto="__text__", default={}, store=False, description="Additional metadata provided with the file at the time of submission.") owner = odm.Optional(odm.Keyword(), description="Specifies the user or system component that has taken ownership of the alert. If no user has claimed the alert, it remains under system ownership with no specific user associated, indicated by a value of `None`.") priority = odm.Optional(odm.Enum(values=PRIORITIES), description="Indicates the importance level assigned to the alert.") reporting_ts = odm.Date(description="Timestamp when the alert was created.")