From 83b01fed393223fe9a2b220dd0a2ec542311df03 Mon Sep 17 00:00:00 2001 From: cccs-kevin Date: Thu, 26 May 2022 16:30:19 +0000 Subject: [PATCH] Removing quotes around wildcards, adding space to reserved chars --- .../al_incident_downloader.py | 55 +++++++++++++------ assemblyline_incident_manager/helper.py | 12 ++-- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/assemblyline_incident_manager/al_incident_downloader.py b/assemblyline_incident_manager/al_incident_downloader.py index 3ceabf7..8860ff2 100755 --- a/assemblyline_incident_manager/al_incident_downloader.py +++ b/assemblyline_incident_manager/al_incident_downloader.py @@ -36,14 +36,20 @@ @click.option("-u", "--username", required=True, type=click.STRING, help="Your Assemblyline account username.") @click.option("--apikey", required=True, type=click.Path(exists=True, readable=True), help="A path to a file that contains only your Assemblyline account API key. NOTE that this API key requires read access.") -@click.option("--max_score", required=True, default=1, type=click.INT, help="The maximum score for files that we want to download from Assemblyline.") -@click.option("--incident_num", required=True, type=click.STRING, help="The incident number that each file is associated with.") -@click.option("--download_path", required=True, type=click.Path(exists=False), help="The path to the folder that we will download files to.") -@click.option("--upload_path", required=True, type=click.Path(exists=False), help="The base path from which the files were ingested from on the compromised system.") +@click.option("--max_score", required=True, default=1, type=click.INT, + help="The maximum score for files that we want to download from Assemblyline.") +@click.option("--incident_num", required=True, type=click.STRING, + help="The incident number that each file is associated with.") +@click.option("--download_path", required=True, type=click.Path(exists=False), + help="The path to the folder that we will download files to.") +@click.option("--upload_path", required=True, type=click.Path(exists=False), + help="The base path from which the files were ingested from on the compromised system.") @click.option("-t", "--is_test", is_flag=True, help="A flag that indicates that you're running a test.") -@click.option("--num_of_downloaders", default=1, type=click.INT, help="The number of threads that will be created to facilitate downloading the files.") +@click.option("--num_of_downloaders", default=1, type=click.INT, + help="The number of threads that will be created to facilitate downloading the files.") @click.option("--do_not_verify_ssl", is_flag=True, help="Verify SSL when creating and using the Assemblyline Client.") -def main(url: str, username: str, apikey: str, max_score: int, incident_num: str, download_path: str, upload_path, is_test: bool, num_of_downloaders: int, do_not_verify_ssl: bool): +def main(url: str, username: str, apikey: str, max_score: int, incident_num: str, download_path: str, upload_path, + is_test: bool, num_of_downloaders: int, do_not_verify_ssl: bool): """ Example: al-incident-downloader --url="https://" --username="" --apikey="/path/to/file/containing/apikey" --incident_num=123 --min_score=100 --download_path=/path/to/where/you/want/downloads --upload_path=/path/from/where/files/were/uploaded/from @@ -51,7 +57,7 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str # Here is the query that we will be using to retrieve all submission details incident_num = prepare_query_value(incident_num) prepared_upload_path = prepare_query_value(upload_path) - query = f"metadata.incident_number:\"{incident_num}\" AND max_score:<={max_score} AND metadata.filename:\"*{prepared_upload_path}*\"" + query = f"metadata.incident_number:\"{incident_num}\" AND max_score:<={max_score} AND metadata.filename:*{prepared_upload_path}*" if is_test: print_and_log(log, f"INFO,The query that you will make is: {query}.", logging.DEBUG) @@ -135,7 +141,10 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str unique_file_hashes.add(file_hash) if upload_path not in submitted_filepath: - print_and_log(log, f"INFO,{upload_path} is not in {submitted_filepath} for SID {sid} even though it shares the provided incident number {incident_num}.,{submitted_filepath},{file_hash}", log_level=logging.DEBUG) + print_and_log( + log, + f"INFO,{upload_path} is not in {submitted_filepath} for SID {sid} even though it shares the provided incident number {incident_num}.,{submitted_filepath},{file_hash}", + log_level=logging.DEBUG) continue root_filepath = submitted_filepath.replace(upload_path, "") root_filepath = root_filepath.lstrip("\\") @@ -145,7 +154,10 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str if not overwrite_all and add_unique: if os.path.exists(filepath_to_download): - print_and_log(log, f"INFO,{filepath_to_download} has already been downloaded.,{submitted_filepath},{file_hash}", log_level=logging.DEBUG) + print_and_log( + log, + f"INFO,{filepath_to_download} has already been downloaded.,{submitted_filepath},{file_hash}", + log_level=logging.DEBUG) continue file_queue.put((file_hash, filepath_to_download)) @@ -162,9 +174,16 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str worker.join() print_and_log(log, f"INFO,Download complete!", logging.DEBUG) - print_and_log(log, f"INFO,{len(unique_file_paths)} unique file paths found in {total_submissions_that_match_query} submissions that match the query.", logging.DEBUG) - print_and_log(log, f"INFO,{len(unique_file_hashes)} files with unique contents found in {total_submissions_that_match_query} submissions that match the query.", logging.DEBUG) - print_and_log(log, f"INFO,{total_already_downloaded} files were downloaded to {download_path} in previous runs.", logging.DEBUG) + print_and_log( + log, + f"INFO,{len(unique_file_paths)} unique file paths found in {total_submissions_that_match_query} submissions that match the query.", + logging.DEBUG) + print_and_log( + log, + f"INFO,{len(unique_file_hashes)} files with unique contents found in {total_submissions_that_match_query} submissions that match the query.", + logging.DEBUG) + print_and_log( + log, f"INFO,{total_already_downloaded} files were downloaded to {download_path} in previous runs.", logging.DEBUG) print_and_log(log, f"INFO,{total_downloaded} files downloaded to {download_path} in current run.", logging.DEBUG) print_and_log(log, f"INFO,Total elapsed time: {time() - start_time}.", logging.DEBUG) print_and_log(log, "INFO,Thank you for using Assemblyline :)", logging.DEBUG) @@ -173,16 +192,20 @@ def main(url: str, username: str, apikey: str, max_score: int, incident_num: str def _handle_overwrite(download_dir: str) -> (bool, bool): overwrite_all = False add_unique = False - overwrite = input(f"The download directory {download_dir} already exists. Do you wish to overwrite all contents? [y/n]:") + overwrite = input( + f"The download directory {download_dir} already exists. Do you wish to overwrite all contents? [y/n]:") if overwrite == "y": overwrite_all = True elif overwrite == "n": - add_missing = input(f"The download directory {download_dir} already exists. Do you wish to download additional files to this directory? [y/n]:") + add_missing = input( + f"The download directory {download_dir} already exists. Do you wish to download additional files to this directory? [y/n]:") if add_missing == "y": add_unique = True elif add_missing == "n": - print_and_log(log, f"INFO,The download directory {download_dir} already exists. You chose not to download additional files and to exit.", - logging.DEBUG) + print_and_log( + log, + f"INFO,The download directory {download_dir} already exists. You chose not to download additional files and to exit.", + logging.DEBUG) else: print_and_log(log, "INFO,You submitted a value that was neither [y/n]. Exiting.", logging.DEBUG) else: diff --git a/assemblyline_incident_manager/helper.py b/assemblyline_incident_manager/helper.py index c337bbc..a9b71f0 100755 --- a/assemblyline_incident_manager/helper.py +++ b/assemblyline_incident_manager/helper.py @@ -14,7 +14,7 @@ FULL_URI = f"^((?:(?:[A-Za-z]*:)?//)?(?:\\S+(?::\\S*)?@)?(?:{IP_REGEX}|{DOMAIN_REGEX})(?::\\d{{2,5}})?){URI_PATH}?$" DEFAULT_SERVICES = ["Static Analysis", "Extraction", "Networking", "Antivirus"] -RESERVED_CHARACTERS = [".", "?", "+", "*", "|", "{", "}", "[", "]", "(", ")", '"', "\\", ":", "/"] +RESERVED_CHARACTERS = [".", "?", "+", "*", "|", "{", "}", "[", "]", "(", ")", '"', "\\", ":", "/", " "] _VALID_UTF8 = compile(rb"""((?: [\x09\x0a\x20-\x7e]| # 1-byte (ASCII excluding control chars). @@ -48,7 +48,8 @@ def __init__(self, log: logging.Logger, url: str, username: str, apikey: str, do self.al_client = None self._thr_refresh_client(log, url, username, apikey, do_not_verify_ssl) - def _thr_refresh_client(self, log: logging.Logger, url: str, username: str, apikey: str, do_not_verify_ssl: bool) -> None: + def _thr_refresh_client( + self, log: logging.Logger, url: str, username: str, apikey: str, do_not_verify_ssl: bool) -> None: print_and_log(log, "ADMIN,Refreshing the Assemblyline Client...,,", logging.DEBUG) self.al_client = get_client(url, apikey=(username, apikey), verify=not do_not_verify_ssl) thr = Timer(1800, self._thr_refresh_client, (log, url, username, apikey, do_not_verify_ssl)) @@ -77,7 +78,8 @@ def _validate_service_selection(log: logging.Logger, service_selection: str) -> services_selected = service_selection.split(",") for service_selected in services_selected: if not service_selected: - print_and_log(log, f"ADMIN,Invalid service selected {service_selected} of {services_selected},,", logging.ERROR) + print_and_log( + log, f"ADMIN,Invalid service selected {service_selected} of {services_selected},,", logging.ERROR) return [] return services_selected @@ -141,5 +143,7 @@ def _escape(t, reversible=True): def prepare_query_value(query_value: str) -> str: if any(reserved_char in query_value for reserved_char in RESERVED_CHARACTERS): - query_value = query_value.translate(str.maketrans({reserved_char: f"\\{reserved_char}" for reserved_char in RESERVED_CHARACTERS})) + query_value = query_value.translate(str.maketrans( + {reserved_char: f"\\{reserved_char}" + for reserved_char in RESERVED_CHARACTERS})) return query_value