Skip to content

Commit

Permalink
Merge pull request #4 from CybercentreCanada/revert-3-refactor/remove…
Browse files Browse the repository at this point in the history
…_dup

Revert "Removing duplicate method and associated tests"
  • Loading branch information
cccs-kevin authored Apr 29, 2022
2 parents e01520c + f6e195e commit 888a015
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
23 changes: 19 additions & 4 deletions assemblyline_incident_manager/al_incident_submitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from threading import Thread
from queue import Queue

from assemblyline.common.digests import get_sha256_for_file
from assemblyline_client import Client4
from assemblyline_incident_manager.helper import init_logging, print_and_log, validate_parameters, prepare_apikey, safe_str, Client, prepare_query_value

Expand Down Expand Up @@ -48,6 +47,22 @@
total_file_count = 0


def get_id_from_data(file_path: str) -> str:
"""
This method generates a sha256 hash for the file contents of a file
@param file_path: The file path
@return _hash: The sha256 hash of the file
"""
sha256_hash = sha256()
# stream it in so we don't load the whole file in memory
with open(file_path, 'rb') as f:
data = f.read(4096)
while data:
sha256_hash.update(data)
data = f.read(4096)
return sha256_hash.hexdigest()


# These are click commands and options which allow the easy handling of command line arguments and flags
@click.group(invoke_without_command=True)
@click.option("--url", required=True, type=click.STRING, help="The target URL that hosts Assemblyline.")
Expand Down Expand Up @@ -267,7 +282,7 @@ def _test_ingest_file(al_client: Client4, settings: dict, incident_num: str, ale
with open(TEST_FILE, "wb") as f:
f.write(file_contents)

sha = get_sha256_for_file(TEST_FILE)
sha = get_id_from_data(TEST_FILE)

# Ingesting the test file
print_and_log(log, f"INGEST,{TEST_FILE} ({sha}) is about to be ingested in test mode.,{TEST_FILE},{sha}", logging.DEBUG)
Expand Down Expand Up @@ -305,7 +320,7 @@ def _get_most_recent_file_path() -> (bool, str):

# This adds the most recent hash that has been ingested to the hash table, so that
# we do not re-ingest it during this run.
sha = get_sha256_for_file(file_path)
sha = get_id_from_data(file_path)
hash_table.append(sha)
return True, file_path

Expand Down Expand Up @@ -347,7 +362,7 @@ def _thr_ingest_file(
return

# Create a sha256 hash using the file contents.
sha = get_sha256_for_file(file_path)
sha = get_id_from_data(file_path)

# If hash has already been submitted, then skip it
if dedup_hashes and sha in hash_table:
Expand Down
13 changes: 13 additions & 0 deletions test/test_al_incident_submitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ def teardown_class(cls):
remove(path.join(TEST_DIR, file))
rmdir(TEST_DIR)

@staticmethod
@pytest.mark.parametrize("data, expected_result", [
(b"blah", '8b7df143d91c716ecfa5fc1730022f6b421b05cedee8fd52b1fc65a96030ad52')
])
def test_get_id_from_data(data, expected_result):
from os import remove
from assemblyline_incident_manager.al_incident_submitter import get_id_from_data
SOME_FILE = "some_file.txt"
with open(SOME_FILE, "wb") as f:
f.write(b"blah")
assert get_id_from_data(SOME_FILE) == expected_result
remove(SOME_FILE)

@staticmethod
@pytest.mark.parametrize("case, command_line_options", [
(
Expand Down

0 comments on commit 888a015

Please sign in to comment.