From 83b01fed393223fe9a2b220dd0a2ec542311df03 Mon Sep 17 00:00:00 2001
From: cccs-kevin <kevin.hardy-cooper@cyber.gc.ca>
Date: Thu, 26 May 2022 16:30:19 +0000
Subject: [PATCH] Removing quotes around wildcards, adding space to reserved
 chars

---
 .../al_incident_downloader.py                 | 55 +++++++++++++------
 assemblyline_incident_manager/helper.py       | 12 ++--
 2 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/assemblyline_incident_manager/al_incident_downloader.py b/assemblyline_incident_manager/al_incident_downloader.py
index 3ceabf7..8860ff2 100755
--- a/assemblyline_incident_manager/al_incident_downloader.py
+++ b/assemblyline_incident_manager/al_incident_downloader.py
@@ -36,14 +36,20 @@
 @click.option("-u", "--username", required=True, type=click.STRING, help="Your Assemblyline account username.")
 @click.option("--apikey", required=True, type=click.Path(exists=True, readable=True),
               help="A path to a file that contains only your Assemblyline account API key. NOTE that this API key requires read access.")
-@click.option("--max_score", required=True, default=1, type=click.INT, help="The maximum score for files that we want to download from Assemblyline.")
-@click.option("--incident_num", required=True, type=click.STRING, help="The incident number that each file is associated with.")
-@click.option("--download_path", required=True, type=click.Path(exists=False), help="The path to the folder that we will download files to.")
-@click.option("--upload_path", required=True, type=click.Path(exists=False), help="The base path from which the files were ingested from on the compromised system.")
+@click.option("--max_score", required=True, default=1, type=click.INT,
+              help="The maximum score for files that we want to download from Assemblyline.")
+@click.option("--incident_num", required=True, type=click.STRING,
+              help="The incident number that each file is associated with.")
+@click.option("--download_path", required=True, type=click.Path(exists=False),
+              help="The path to the folder that we will download files to.")
+@click.option("--upload_path", required=True, type=click.Path(exists=False),
+              help="The base path from which the files were ingested from on the compromised system.")
 @click.option("-t", "--is_test", is_flag=True, help="A flag that indicates that you're running a test.")
-@click.option("--num_of_downloaders", default=1, type=click.INT, help="The number of threads that will be created to facilitate downloading the files.")
+@click.option("--num_of_downloaders", default=1, type=click.INT,
+              help="The number of threads that will be created to facilitate downloading the files.")
 @click.option("--do_not_verify_ssl", is_flag=True, help="Verify SSL when creating and using the Assemblyline Client.")
-def main(url: str, username: str, apikey: str, max_score: int, incident_num: str, download_path: str, upload_path, is_test: bool, num_of_downloaders: int, do_not_verify_ssl: bool):
+def main(url: str, username: str, apikey: str, max_score: int, incident_num: str, download_path: str, upload_path,
+         is_test: bool, num_of_downloaders: int, do_not_verify_ssl: bool):
     """
     Example:
     al-incident-downloader --url="https://<domain-of-Assemblyline-instance>" --username="<user-name>" --apikey="/path/to/file/containing/apikey" --incident_num=123 --min_score=100 --download_path=/path/to/where/you/want/downloads --upload_path=/path/from/where/files/were/uploaded/from
@@ -51,7 +57,7 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str
     # Here is the query that we will be using to retrieve all submission details
     incident_num = prepare_query_value(incident_num)
     prepared_upload_path = prepare_query_value(upload_path)
-    query = f"metadata.incident_number:\"{incident_num}\" AND max_score:<={max_score} AND metadata.filename:\"*{prepared_upload_path}*\""
+    query = f"metadata.incident_number:\"{incident_num}\" AND max_score:<={max_score} AND metadata.filename:*{prepared_upload_path}*"
 
     if is_test:
         print_and_log(log, f"INFO,The query that you will make is: {query}.", logging.DEBUG)
@@ -135,7 +141,10 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str
             unique_file_hashes.add(file_hash)
 
             if upload_path not in submitted_filepath:
-                print_and_log(log, f"INFO,{upload_path} is not in {submitted_filepath} for SID {sid} even though it shares the provided incident number {incident_num}.,{submitted_filepath},{file_hash}", log_level=logging.DEBUG)
+                print_and_log(
+                    log,
+                    f"INFO,{upload_path} is not in {submitted_filepath} for SID {sid} even though it shares the provided incident number {incident_num}.,{submitted_filepath},{file_hash}",
+                    log_level=logging.DEBUG)
                 continue
             root_filepath = submitted_filepath.replace(upload_path, "")
             root_filepath = root_filepath.lstrip("\\")
@@ -145,7 +154,10 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str
 
             if not overwrite_all and add_unique:
                 if os.path.exists(filepath_to_download):
-                    print_and_log(log, f"INFO,{filepath_to_download} has already been downloaded.,{submitted_filepath},{file_hash}", log_level=logging.DEBUG)
+                    print_and_log(
+                        log,
+                        f"INFO,{filepath_to_download} has already been downloaded.,{submitted_filepath},{file_hash}",
+                        log_level=logging.DEBUG)
                     continue
 
             file_queue.put((file_hash, filepath_to_download))
@@ -162,9 +174,16 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str
         worker.join()
 
     print_and_log(log, f"INFO,Download complete!", logging.DEBUG)
-    print_and_log(log, f"INFO,{len(unique_file_paths)} unique file paths found in {total_submissions_that_match_query} submissions that match the query.", logging.DEBUG)
-    print_and_log(log, f"INFO,{len(unique_file_hashes)} files with unique contents found in {total_submissions_that_match_query} submissions that match the query.", logging.DEBUG)
-    print_and_log(log, f"INFO,{total_already_downloaded} files were downloaded to {download_path} in previous runs.", logging.DEBUG)
+    print_and_log(
+        log,
+        f"INFO,{len(unique_file_paths)} unique file paths found in {total_submissions_that_match_query} submissions that match the query.",
+        logging.DEBUG)
+    print_and_log(
+        log,
+        f"INFO,{len(unique_file_hashes)} files with unique contents found in {total_submissions_that_match_query} submissions that match the query.",
+        logging.DEBUG)
+    print_and_log(
+        log, f"INFO,{total_already_downloaded} files were downloaded to {download_path} in previous runs.", logging.DEBUG)
     print_and_log(log, f"INFO,{total_downloaded} files downloaded to {download_path} in current run.", logging.DEBUG)
     print_and_log(log, f"INFO,Total elapsed time: {time() - start_time}.", logging.DEBUG)
     print_and_log(log, "INFO,Thank you for using Assemblyline :)", logging.DEBUG)
@@ -173,16 +192,20 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str
 def _handle_overwrite(download_dir: str) -> (bool, bool):
     overwrite_all = False
     add_unique = False
-    overwrite = input(f"The download directory {download_dir} already exists. Do you wish to overwrite all contents? [y/n]:")
+    overwrite = input(
+        f"The download directory {download_dir} already exists. Do you wish to overwrite all contents? [y/n]:")
     if overwrite == "y":
         overwrite_all = True
     elif overwrite == "n":
-        add_missing = input(f"The download directory {download_dir} already exists. Do you wish to download additional files to this directory? [y/n]:")
+        add_missing = input(
+            f"The download directory {download_dir} already exists. Do you wish to download additional files to this directory? [y/n]:")
         if add_missing == "y":
             add_unique = True
         elif add_missing == "n":
-            print_and_log(log, f"INFO,The download directory {download_dir} already exists. You chose not to download additional files and to exit.",
-                          logging.DEBUG)
+            print_and_log(
+                log,
+                f"INFO,The download directory {download_dir} already exists. You chose not to download additional files and to exit.",
+                logging.DEBUG)
         else:
             print_and_log(log, "INFO,You submitted a value that was neither [y/n]. Exiting.", logging.DEBUG)
     else:
diff --git a/assemblyline_incident_manager/helper.py b/assemblyline_incident_manager/helper.py
index c337bbc..a9b71f0 100755
--- a/assemblyline_incident_manager/helper.py
+++ b/assemblyline_incident_manager/helper.py
@@ -14,7 +14,7 @@
 FULL_URI = f"^((?:(?:[A-Za-z]*:)?//)?(?:\\S+(?::\\S*)?@)?(?:{IP_REGEX}|{DOMAIN_REGEX})(?::\\d{{2,5}})?){URI_PATH}?$"
 
 DEFAULT_SERVICES = ["Static Analysis", "Extraction", "Networking", "Antivirus"]
-RESERVED_CHARACTERS = [".", "?", "+", "*", "|", "{", "}", "[", "]", "(", ")", '"', "\\", ":", "/"]
+RESERVED_CHARACTERS = [".", "?", "+", "*", "|", "{", "}", "[", "]", "(", ")", '"', "\\", ":", "/", " "]
 
 _VALID_UTF8 = compile(rb"""((?:
     [\x09\x0a\x20-\x7e]|             # 1-byte (ASCII excluding control chars).
@@ -48,7 +48,8 @@ def __init__(self, log: logging.Logger, url: str, username: str, apikey: str, do
         self.al_client = None
         self._thr_refresh_client(log, url, username, apikey, do_not_verify_ssl)
 
-    def _thr_refresh_client(self, log: logging.Logger, url: str, username: str, apikey: str, do_not_verify_ssl: bool) -> None:
+    def _thr_refresh_client(
+            self, log: logging.Logger, url: str, username: str, apikey: str, do_not_verify_ssl: bool) -> None:
         print_and_log(log, "ADMIN,Refreshing the Assemblyline Client...,,", logging.DEBUG)
         self.al_client = get_client(url, apikey=(username, apikey), verify=not do_not_verify_ssl)
         thr = Timer(1800, self._thr_refresh_client, (log, url, username, apikey, do_not_verify_ssl))
@@ -77,7 +78,8 @@ def _validate_service_selection(log: logging.Logger, service_selection: str) ->
     services_selected = service_selection.split(",")
     for service_selected in services_selected:
         if not service_selected:
-            print_and_log(log, f"ADMIN,Invalid service selected {service_selected} of {services_selected},,", logging.ERROR)
+            print_and_log(
+                log, f"ADMIN,Invalid service selected {service_selected} of {services_selected},,", logging.ERROR)
             return []
     return services_selected
 
@@ -141,5 +143,7 @@ def _escape(t, reversible=True):
 
 def prepare_query_value(query_value: str) -> str:
     if any(reserved_char in query_value for reserved_char in RESERVED_CHARACTERS):
-        query_value = query_value.translate(str.maketrans({reserved_char: f"\\{reserved_char}" for reserved_char in RESERVED_CHARACTERS}))
+        query_value = query_value.translate(str.maketrans(
+            {reserved_char: f"\\{reserved_char}"
+             for reserved_char in RESERVED_CHARACTERS}))
     return query_value