Merge branch 'feature/jupyter-analyzer' of https://github.com/LetMeR0…

…0t/Cortex-Analyzers into LetMeR00t-feature/jupyter-analyzer
TheHive-Project · Aug 16, 2023 · 787e62a · 787e62a
2 parents 9d1b597 + 4f484f3
commit 787e62a
Show file tree

Hide file tree

Showing 8 changed files with 113 additions and 52 deletions.
diff --git a/analyzers/Jupyter_Analyzer/Jupyter_Run_Notebook_Analyzer.json b/analyzers/Jupyter_Analyzer/Jupyter_Run_Notebook_Analyzer.json
@@ -91,7 +91,7 @@
     },
     {
       "name": "output_folder",
-      "description": "[OUTPUT] Folder path in which executed notebooks will be stored",
+      "description": "[OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting datetime format (see 'strftime' function).",
       "type": "string",
       "multi": false,
       "required": true,

diff --git a/analyzers/Jupyter_Analyzer/README.md b/analyzers/Jupyter_Analyzer/README.md
@@ -129,10 +129,10 @@ Here is the description for each parameter:
  - `input_handler_http_is_jupyterhub`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
  - `input_handler_http_execute_remotely`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you want to run your code locally (papermill) or remotely (websocket through HTTP), otherwise don't take this parameter into account
  - `input_paths`: [INPUT] List of paths of the notebooks you want to run
- output_hostname: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information. **Input paths must start with a "/"**.
+ - `output_hostname`: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information.
  - `output_handler_http_service_api_token`: [HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate an API token used by a dedicated service, otherwise don't take this parameter into account
  - `output_handler_http_is_jupyterhub`: [OUTPUT][HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- - `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. **Output folder must start and end with a "/"**. (Default: /)
+ - `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting format code for datetime such as the one used by the `strftime()` function.
  - `any_handler_http_user`: [ANY][HTTP Handler] If you want to use the REST API directly (HTTP handler), you must indicate which user will be used as the reference for having the original notebooks, otherwise don't take this parameter into account.
  - `any_generate_html`: [ANY] Indicates if you want only the HTML conversion as a response (not the full detailed payload) (Default: true)
 
@@ -210,4 +210,8 @@ root#> su cortex
 cortex#> ipython kernel install --name "python3" --user
  ```
 
+## I have some trouble with the library Papermill and more precisely on the file `papermill/iorw.py`
+
+If you're using a hostname input or output starting with "http(s)", please check that you applied the patch mentionned above as expected. Otherwise, please raise an issue.
+
 You can reach the developer directly by email: [email protected]
diff --git a/analyzers/Jupyter_Analyzer/assets/screenshot_cortex_analyzer_settings_example.png b/analyzers/Jupyter_Analyzer/assets/screenshot_cortex_analyzer_settings_example.png
diff --git a/analyzers/Jupyter_Analyzer/jupyter.py b/analyzers/Jupyter_Analyzer/jupyter.py
@@ -23,6 +23,16 @@ def __init__(self):
         """Initialize the Jupyter analyzer"""
         Analyzer.__init__(self)
 
+        # Initialize the ouput folder
+        self.output_folder = self.get_param(
+            "config.output_folder",
+            None,
+            "You must provide an output folder path in which executed notebooks will be stored",
+        )
+        today = datetime.date.today()
+        # Parse the output folder to catch datetime masks
+        self.output_folder = datetime.datetime.strftime(today,self.output_folder)
+
         # Initialize configuration objects for notebooks
         self.input_configuration = self.initialize_path(
             hostname=self.get_param(
@@ -65,11 +75,6 @@ def __init__(self):
                 "[HTTP Handler only] JupyterHub for output notebook boolean parameter is missing.",
             ),
         )
-        self.output_folder = self.get_param(
-            "config.output_folder",
-            None,
-            "You must provide an output folder path in which executed notebooks will be stored",
-        )
 
         # Additional parameters
         self.any_generate_html = self.get_param(
@@ -166,6 +171,9 @@ def initialize_path(
                     "server_uri_http_api_kernels"
                 ].replace("http://", "ws://")
 
+            # Initialize the ouput path
+            self.create_output_path(hostname=result["server_uri_http_api_contents"],headers=result["handler_http_headers"])
+
         return result
 
     def get_conf(self, type, name):
@@ -313,6 +321,31 @@ def stop_server(self, hub_url, user, server_name=""):
             # wait to poll again
             time.sleep(1)
 
+    def create_output_path(self, hostname, headers=None):
+        """This function is used to create the output path subfolders if they aren't existing yet
+
+        Args:
+            hostname (str): URL of the hostname
+            headers (dict, optional): Headers for the request. Defaults to None.
+        """
+        subpaths = self.output_folder.split("/")
+        new_path = ""
+        for sp in subpaths:
+            new_path += "/{0}".format(sp)
+            # Build the path
+            final_path = "{0}{1}".format(
+                hostname,
+                new_path
+            )
+
+            # Check if folder is existing
+            status_code = requests.get(final_path, headers=headers).status_code
+            # If the folder exists, it will return a status code 200. Otherwise, we will need to create
+            if status_code != 200:
+                # Create the folder
+                requests.put(final_path, json={"name": sp, "type": "directory"}, headers=headers)
+
+
     @gen.coroutine
     def execute_notebook_remotely(self):
         """This function is used to execute a notebook thanks to a remote Jupyter instance using the REST API and a dedicated websocket
@@ -446,29 +479,26 @@ def execute_notebook_remotely(self):
             # Write the notebook after execution
             # Build output path notebook
             if self.get_conf("output", "handler_type") is HttpHandler:
-                output_path = "{0}{1}{2}-{3}-{4}?token={5}".format(
+                output_path = "{0}/{1}/{2}-{3}?token={4}".format(
                     self.get_conf("output", "server_uri_http_api_contents"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                     self.get_conf("output", "handler_http_service_api_token"),
                 )
-                output_path_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
+                output_path_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
                     self.get_conf("output", "hostname"),
                     self.get_conf("output", "handler_http_user"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                 )
             else:
-                output_path = "{0}{1}{2}-{3}-".format(
+                output_path = "{0}/{1}/{2}".format(
                     self.get_conf("output", "hostname"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                 )
                 output_path_direct = output_path
             pm.iorw.write_ipynb(nb_output, output_path)
@@ -523,29 +553,26 @@ def run(self):
 
                 # Build output path notebook
                 if self.get_conf("output", "handler_type") is HttpHandler:
-                    output_notebook = "{0}{1}{2}-{3}-{4}?token={5}".format(
+                    output_notebook = "{0}/{1}/{2}-{3}?token={4}".format(
                         self.get_conf("output", "server_uri_http_api_contents"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                         self.get_conf("output", "handler_http_service_api_token"),
                     )
-                    output_notebook_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
+                    output_notebook_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
                         self.get_conf("output", "hostname"),
                         self.get_conf("output", "handler_http_user"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                     )
                 else:
-                    output_notebook = "{0}{1}{2}-{3}-".format(
+                    output_notebook = "{0}/{1}/{2}-".format(
                         self.get_conf("output", "hostname"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                     )
                     output_notebook_direct = output_notebook
 

diff --git a/responders/Jupyter_Responder/Jupyter_Run_Notebook_Responder.json b/responders/Jupyter_Responder/Jupyter_Run_Notebook_Responder.json
@@ -77,7 +77,7 @@
     },
     {
       "name": "output_folder",
-      "description": "[OUTPUT] Folder path in which executed notebooks will be stored",
+      "description": "[OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting datetime format (see 'strftime' function).",
       "type": "string",
       "multi": false,
       "required": true,

diff --git a/responders/Jupyter_Responder/README.md b/responders/Jupyter_Responder/README.md
@@ -131,10 +131,10 @@ Here is the description for each parameter:
  - `input_handler_http_is_jupyterhub`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
  - `input_handler_http_execute_remotely`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you want to run your code locally (papermill) or remotely (websocket through HTTP), otherwise don't take this parameter into account
  - `input_paths`: [INPUT] List of paths of the notebooks you want to run
- output_hostname: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information. **Input paths must start with a "/"**.
+ - `output_hostname`: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information.
  - `output_handler_http_service_api_token`: [HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate an API token used by a dedicated service, otherwise don't take this parameter into account
  - `output_handler_http_is_jupyterhub`: [OUTPUT][HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- - `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. **Output folder must start and end with a "/"**. (Default: /)
+ - `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting format code for datetime such as the one used by the `strftime()` function.
  - `any_handler_http_user`: [ANY][HTTP Handler] If you want to use the REST API directly (HTTP handler), you must indicate which user will be used as the reference for having the original notebooks, otherwise don't take this parameter into account.
 
 Here is an example of what it could looks like:
@@ -197,4 +197,8 @@ root#> su cortex
 cortex#> ipython kernel install --name "python3" --user
  ```
 
+## I have some trouble with the library Papermill and more precisely on the file `papermill/iorw.py`
+
+If you're using a hostname input or output starting with "http(s)", please check that you applied the patch mentionned above as expected. Otherwise, please raise an issue.
+
 You can reach the developer directly by email: [email protected]
diff --git a/...nders/Jupyter_Responder/assets/screenshot_cortex_responder_settings_example.png b/...nders/Jupyter_Responder/assets/screenshot_cortex_responder_settings_example.png
diff --git a/responders/Jupyter_Responder/jupyter.py b/responders/Jupyter_Responder/jupyter.py
@@ -23,6 +23,16 @@ def __init__(self):
         """Initialize the Jupyter analyzer"""
         Responder.__init__(self)
 
+        # Initialize the ouput folder
+        self.output_folder = self.get_param(
+            "config.output_folder",
+            None,
+            "You must provide an output folder path in which executed notebooks will be stored",
+        )
+        today = datetime.date.today()
+        # Parse the output folder to catch datetime masks
+        self.output_folder = datetime.datetime.strftime(today,self.output_folder)
+
         # Initialize configuration objects for notebooks
         self.input_configuration = self.initialize_path(
             hostname=self.get_param(
@@ -65,11 +75,6 @@ def __init__(self):
                 "[HTTP Handler only] JupyterHub for output notebook boolean parameter is missing.",
             ),
         )
-        self.output_folder = self.get_param(
-            "config.output_folder",
-            None,
-            "You must provide an output folder path in which executed notebooks will be stored",
-        )
 
         # Use input data as ID depending on the type
         if self.data_type == "thehive:alert":
@@ -170,6 +175,9 @@ def initialize_path(
                     "server_uri_http_api_kernels"
                 ].replace("http://", "ws://")
 
+            # Initialize the ouput path
+            self.create_output_path(hostname=result["server_uri_http_api_contents"],headers=result["handler_http_headers"])
+
         return result
 
     def get_conf(self, type, name):
@@ -317,6 +325,30 @@ def stop_server(self, hub_url, user, server_name=""):
             # wait to poll again
             time.sleep(1)
 
+    def create_output_path(self, hostname, headers=None):
+        """This function is used to create the output path subfolders if they aren't existing yet
+
+        Args:
+            hostname (str): URL of the hostname
+            headers (dict, optional): Headers for the request. Defaults to None.
+        """
+        subpaths = self.output_folder.split("/")
+        new_path = ""
+        for sp in subpaths:
+            new_path += "/{0}".format(sp)
+            # Build the path
+            final_path = "{0}{1}".format(
+                hostname,
+                new_path
+            )
+
+            # Check if folder is existing
+            status_code = requests.get(final_path, headers=headers).status_code
+            # If the folder exists, it will return a status code 200. Otherwise, we will need to create
+            if status_code != 200:
+                # Create the folder
+                requests.put(final_path, json={"name": sp, "type": "directory"}, headers=headers)
+
     @gen.coroutine
     def execute_notebook_remotely(self):
         """This function is used to execute a notebook thanks to a remote Jupyter instance using the REST API and a dedicated websocket
@@ -450,29 +482,26 @@ def execute_notebook_remotely(self):
             # Write the notebook after execution
             # Build output path notebook
             if self.get_conf("output", "handler_type") is HttpHandler:
-                output_path = "{0}{1}{2}-{3}-{4}?token={5}".format(
+                output_path = "{0}/{1}/{2}-{3}?token={4}".format(
                     self.get_conf("output", "server_uri_http_api_contents"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                     self.get_conf("output", "handler_http_service_api_token"),
                 )
-                output_path_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
+                output_path_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
                     self.get_conf("output", "hostname"),
                     self.get_conf("output", "handler_http_user"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                 )
             else:
-                output_path = "{0}{1}{2}-{3}-".format(
+                output_path = "{0}/{1}/{2}".format(
                     self.get_conf("output", "hostname"),
                     self.output_folder,
-                    str(datetime.date.today()),
                     self.id,
-                    path[1:],
+                    path,
                 )
                 output_path_direct = output_path
             pm.iorw.write_ipynb(nb_output, output_path)
@@ -527,29 +556,26 @@ def run(self):
 
                 # Build output path notebook
                 if self.get_conf("output", "handler_type") is HttpHandler:
-                    output_notebook = "{0}{1}{2}-{3}-{4}?token={5}".format(
+                    output_notebook = "{0}/{1}/{2}-{3}?token={4}".format(
                         self.get_conf("output", "server_uri_http_api_contents"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                         self.get_conf("output", "handler_http_service_api_token"),
                     )
-                    output_notebook_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
+                    output_notebook_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
                         self.get_conf("output", "hostname"),
                         self.get_conf("output", "handler_http_user"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                     )
                 else:
-                    output_notebook = "{0}{1}{2}-{3}-".format(
+                    output_notebook = "{0}/{1}/{2}-".format(
                         self.get_conf("output", "hostname"),
                         self.output_folder,
-                        str(datetime.date.today()),
                         self.id,
-                        path[1:],
+                        path,
                     )
                     output_notebook_direct = output_notebook