Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfixes for the ODM #1886

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 42 additions & 20 deletions assemblyline/datastore/support/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
back_mapping.update({x: Keyword for x in set(__analyzer_mapping.values())})


def build_mapping(field_data, prefix=None, allow_refuse_implicit=True):
def build_mapping(field_data, prefix=None, allow_refuse_implicit=True, default_copyto=None):
"""
The mapping for Elasticsearch based on a python model object.
"""
Expand All @@ -72,6 +72,8 @@ def set_mapping(temp_field: _Field, body):
if temp_field.copyto:
assert len(temp_field.copyto) == 1
body['copy_to'] = temp_field.copyto[0]
elif default_copyto is not None and default_copyto:
body['copy_to'] = default_copyto[0]

return body

Expand Down Expand Up @@ -127,13 +129,14 @@ def set_mapping(temp_field: _Field, body):
})

elif isinstance(field, FlattenedObject):
if not field.index or isinstance(field.child_type, Any):
if not any_indexed_part(field) or isinstance(field.child_type, Any):
mappings[name.strip(".")] = {"type": "object", "enabled": False}
else:
dynamic.extend(build_templates(f'{name}.*', field.child_type, nested_template=True, index=field.index))

elif isinstance(field, List):
temp_mappings, temp_dynamic = build_mapping([field.child_type], prefix=path,
default_copyto=field.copyto,
allow_refuse_implicit=False)
mappings.update(temp_mappings)
dynamic.extend(temp_dynamic)
Expand All @@ -151,7 +154,7 @@ def set_mapping(temp_field: _Field, body):
dynamic.extend(temp_dynamic)

elif isinstance(field, Mapping):
if not field.index or isinstance(field.child_type, Any):
if not any_indexed_part(field) or isinstance(field.child_type, Any):
mappings[name.strip(".")] = {"type": "object", "enabled": False}
else:
dynamic.extend(build_templates(f'{name}.*', field.child_type, index=field.index))
Expand Down Expand Up @@ -186,6 +189,25 @@ def set_mapping(temp_field: _Field, body):
return mappings, dynamic


def any_indexed_part(field) -> bool:
"""Figure out if any component of this field is indexed."""
if isinstance(field, (FlattenedObject, List, Optional, Mapping)):
if field.index is not None:
return field.index
return any_indexed_part(field.child_type)

elif isinstance(field, Compound):
if field.index is not None:
return field.index
for subfield in field.fields().values():
if any_indexed_part(subfield):
return True
return False

else:
return field.index


def build_templates(name, field, nested_template=False, index=True) -> list:
if isinstance(field, (Keyword, Boolean, Integer, Float, Text, Json)):
if nested_template:
Expand All @@ -212,29 +234,16 @@ def build_templates(name, field, nested_template=False, index=True) -> list:

return [{f"{name}_tpl": field_template}]

elif isinstance(field, Any) or not index:
field_template = {
"path_match": name,
"mapping": {
"type": "keyword",
"index": False
}
}

if field.index:
raise ValueError(f"Mapping to Any may not be indexed: {name}")
return [{f"{name}_tpl": field_template}]

elif isinstance(field, (Mapping, List)):
temp_name = name
if field.name:
temp_name = f"{name}.{field.name}"
# if field.name:
# temp_name = f"{name}.{field.name}"
return build_templates(temp_name, field.child_type, nested_template=True)

elif isinstance(field, Compound):
temp_name = name
if field.name:
temp_name = f"{name}.{field.name}"
# if field.name:
# temp_name = f"{name}.{field.name}"

out = []
for sub_name, sub_field in field.fields().items():
Expand All @@ -246,5 +255,18 @@ def build_templates(name, field, nested_template=False, index=True) -> list:
elif isinstance(field, Optional):
return build_templates(name, field.child_type, nested_template=nested_template)

elif isinstance(field, Any) or not index:
field_template = {
"path_match": name,
"mapping": {
"type": "keyword",
"index": False
}
}

if field.index:
raise ValueError(f"Mapping to Any may not be indexed: {name}")
return [{f"{name}_tpl": field_template}]

else:
raise NotImplementedError(f"Unknown type for elasticsearch dynamic mapping: {field.__class__}")
13 changes: 13 additions & 0 deletions assemblyline/odm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ def apply_defaults(self, index, store):
if self.store is None:
self.store = store

def inherit_parameters(self, other):
if self.index is None:
self.index = other.index
if self.store is None:
self.store = other.store
if self.ai is None:
self.ai = other.ai
if len(self.copyto) == 0:
self.copyto = other.copyto

def fields(self):
"""
Return the subfields/modified field data.
Expand Down Expand Up @@ -863,6 +873,7 @@ def __init__(self, child_type, auto=False, **kwargs):
super().__init__(**kwargs)
self.child_type = child_type
self.auto = auto
self.inherit_parameters(self.child_type)

def check(self, value, **kwargs):
if self.optional and value is None:
Expand Down Expand Up @@ -957,6 +968,7 @@ def __init__(self, child_type, **kwargs):

super().__init__(**kwargs)
self.child_type = child_type
self.inherit_parameters(child_type)

def check(self, value, **kwargs):
if self.optional and value is None:
Expand Down Expand Up @@ -1056,6 +1068,7 @@ def __init__(self, child_type, **kwargs):
self.default_set = True
child_type.optional = True
self.child_type = child_type
self.inherit_parameters(child_type)

def check(self, value, *args, **kwargs):
if value is None:
Expand Down
2 changes: 2 additions & 0 deletions test/test_odm.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,3 +899,5 @@ class Outer(Model):

assert {k: v.multivalued for k, v in Outer.flat_fields(show_compound=True, skip_mappings=True).items()} == {
"c": True, "c.a": False, "c.b": False}


190 changes: 190 additions & 0 deletions test/test_odm_build_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
from assemblyline import odm
from assemblyline.datastore.support.build import build_mapping


def test_simple_fields():
@odm.model()
class Inner(odm.Model):
text = odm.Text()
key = odm.keyword()
index_key = odm.keyword(index=True)
no_index_key = odm.keyword(index=False)

@odm.model(index=True)
class NestAlwaysIndex(odm.Model):
a = odm.compound(Inner)
b = odm.compound(Inner, index=True)
c = odm.compound(Inner, index=False)

@odm.model()
class Outer(odm.Model):
a = odm.compound(Inner)
b = odm.compound(Inner, index=True)
c = odm.compound(Inner, index=False)
d = odm.compound(NestAlwaysIndex)
e = odm.compound(NestAlwaysIndex, index=False)

# Build the mappings
static, dynamic = build_mapping(Outer.fields().values())

# Check the static fields
FIELDS = {
'a.': [
("text", "text", None),
("key", "keyword", None),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'b.': [
("text", "text", True),
("key", "keyword", True),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'c.': [
("text", "text", False),
("key", "keyword", False),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'd.a.': [
("text", "text", True),
("key", "keyword", True),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'd.b.': [
("text", "text", True),
("key", "keyword", True),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'd.c.': [
("text", "text", False),
("key", "keyword", False),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'e.a.': [
("text", "text", True),
("key", "keyword", True),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'e.b.': [
("text", "text", True),
("key", "keyword", True),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
'e.c.': [
("text", "text", False),
("key", "keyword", False),
("index_key", "keyword", True),
("no_index_key", "keyword", False),
],
}

for prefix, fields in FIELDS.items():
for name, type_, indexed in fields:
field = static.pop(prefix + name)
assert field['type'] == type_
assert field['index'] is indexed, prefix + name
assert len(static) == 0

# Make sure there are no dynamic fields
assert len(dynamic) == 1
assert list(dynamic[0].keys()) == ['refuse_all_implicit_mappings']


def test_dynamic_fields():
@odm.model()
class Inner(odm.Model):
text = odm.Text()
key = odm.keyword()
index_key = odm.keyword(index=True)
no_index_key = odm.keyword(index=False)

@odm.model(index=True)
class NestAlwaysIndex(odm.Model):
a = odm.compound(Inner)
b = odm.compound(Inner, index=True)
c = odm.compound(Inner, index=False)

# Only mappings where the mapping itself is marked for indexing will have its subfields indexed
@odm.model(index=True)
class Outer(odm.Model):
a = odm.mapping(odm.compound(Inner), index=False)
b = odm.mapping(odm.compound(Inner, index=True))
c = odm.mapping(odm.compound(Inner, index=False))
d = odm.mapping(odm.compound(NestAlwaysIndex), index=False)
e = odm.mapping(odm.compound(NestAlwaysIndex, index=False))
f = odm.mapping(odm.integer())

# Build the mappings
static, dynamic = build_mapping(Outer.fields().values())

# make sure the static lines corresponding to mappings are disabled
for name in ['a', 'c', 'd', 'e']:
field = static.pop(name)
assert field['enabled'] is False
assert field['type'] == 'object'
assert len(static) == 0, static

# Make sure there are dynamic rules for the expected fields
rules = {}
for row in dynamic:
for key, config in row.items():
assert key not in rules
rules[key] = config
assert rules == {
'b.*.text_tpl': {'mapping': {'index': True, 'type': 'text'}, 'path_match': 'b.*.text'},
'b.*.key_tpl': {'mapping': {'index': True, 'type': 'keyword'}, 'path_match': 'b.*.key'},
'b.*.index_key_tpl': {'mapping': {'index': True, 'type': 'keyword'}, 'path_match': 'b.*.index_key'},
'b.*.no_index_key_tpl': {'mapping': {'index': False, 'type': 'keyword'}, 'path_match': 'b.*.no_index_key'},
'f.*_tpl': {'mapping': {'index': True, 'type': 'integer'}, 'path_match': 'f.*'},
}


def test_dynamic_fields_simple():
"""Simplified version of the above test for checking some more common cases"""
@odm.model()
class InnerNone(odm.Model):
a = odm.keyword()
b = odm.keyword(index=True)

@odm.model(index=True)
class InnerTrue(odm.Model):
a = odm.keyword()
b = odm.keyword(index=True)

@odm.model()
class Outer(odm.Model):
a = odm.mapping(odm.compound(InnerNone), index=True)
b = odm.mapping(odm.compound(InnerNone, index=True))
c = odm.mapping(odm.compound(InnerNone))
d = odm.mapping(odm.compound(InnerTrue))

# Build the mappings
static, dynamic = build_mapping(Outer.fields().values())

# There shouldn't be any static mappings
assert len(static) == 0

# Make sure there are dynamic rules for the expected fields
rules = {}
for row in dynamic:
for key, config in row.items():
assert key not in rules
rules[key] = config
assert rules == {
'a.*.a_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'a.*.a'},
'a.*.b_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'a.*.b'},
'b.*.a_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'b.*.a'},
'b.*.b_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'b.*.b'},
# All the same exept this one
'c.*.a_tpl': {'mapping': {'type': 'keyword', 'index': None}, 'path_match': 'c.*.a'},
'c.*.b_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'c.*.b'},
'd.*.a_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'd.*.a'},
'd.*.b_tpl': {'mapping': {'type': 'keyword', 'index': True}, 'path_match': 'd.*.b'},
}