Skip to content

Commit

Permalink
Merge branch 'feature/jupyter-analyzer' of https://github.com/LetMeR0…
Browse files Browse the repository at this point in the history
…0t/Cortex-Analyzers into LetMeR00t-feature/jupyter-analyzer
  • Loading branch information
jeromeleonard committed Aug 16, 2023
2 parents 9d1b597 + 4f484f3 commit 787e62a
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
},
{
"name": "output_folder",
"description": "[OUTPUT] Folder path in which executed notebooks will be stored",
"description": "[OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting datetime format (see 'strftime' function).",
"type": "string",
"multi": false,
"required": true,
Expand Down
8 changes: 6 additions & 2 deletions analyzers/Jupyter_Analyzer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ Here is the description for each parameter:
- `input_handler_http_is_jupyterhub`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- `input_handler_http_execute_remotely`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you want to run your code locally (papermill) or remotely (websocket through HTTP), otherwise don't take this parameter into account
- `input_paths`: [INPUT] List of paths of the notebooks you want to run
output_hostname: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information. **Input paths must start with a "/"**.
- `output_hostname`: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information.
- `output_handler_http_service_api_token`: [HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate an API token used by a dedicated service, otherwise don't take this parameter into account
- `output_handler_http_is_jupyterhub`: [OUTPUT][HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. **Output folder must start and end with a "/"**. (Default: /)
- `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting format code for datetime such as the one used by the `strftime()` function.
- `any_handler_http_user`: [ANY][HTTP Handler] If you want to use the REST API directly (HTTP handler), you must indicate which user will be used as the reference for having the original notebooks, otherwise don't take this parameter into account.
- `any_generate_html`: [ANY] Indicates if you want only the HTML conversion as a response (not the full detailed payload) (Default: true)

Expand Down Expand Up @@ -210,4 +210,8 @@ root#> su cortex
cortex#> ipython kernel install --name "python3" --user
```

## I have some trouble with the library Papermill and more precisely on the file `papermill/iorw.py`

If you're using a hostname input or output starting with "http(s)", please check that you applied the patch mentionned above as expected. Otherwise, please raise an issue.

You can reach the developer directly by email: [email protected]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
73 changes: 50 additions & 23 deletions analyzers/Jupyter_Analyzer/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ def __init__(self):
"""Initialize the Jupyter analyzer"""
Analyzer.__init__(self)

# Initialize the ouput folder
self.output_folder = self.get_param(
"config.output_folder",
None,
"You must provide an output folder path in which executed notebooks will be stored",
)
today = datetime.date.today()
# Parse the output folder to catch datetime masks
self.output_folder = datetime.datetime.strftime(today,self.output_folder)

# Initialize configuration objects for notebooks
self.input_configuration = self.initialize_path(
hostname=self.get_param(
Expand Down Expand Up @@ -65,11 +75,6 @@ def __init__(self):
"[HTTP Handler only] JupyterHub for output notebook boolean parameter is missing.",
),
)
self.output_folder = self.get_param(
"config.output_folder",
None,
"You must provide an output folder path in which executed notebooks will be stored",
)

# Additional parameters
self.any_generate_html = self.get_param(
Expand Down Expand Up @@ -166,6 +171,9 @@ def initialize_path(
"server_uri_http_api_kernels"
].replace("http://", "ws://")

# Initialize the ouput path
self.create_output_path(hostname=result["server_uri_http_api_contents"],headers=result["handler_http_headers"])

return result

def get_conf(self, type, name):
Expand Down Expand Up @@ -313,6 +321,31 @@ def stop_server(self, hub_url, user, server_name=""):
# wait to poll again
time.sleep(1)

def create_output_path(self, hostname, headers=None):
"""This function is used to create the output path subfolders if they aren't existing yet
Args:
hostname (str): URL of the hostname
headers (dict, optional): Headers for the request. Defaults to None.
"""
subpaths = self.output_folder.split("/")
new_path = ""
for sp in subpaths:
new_path += "/{0}".format(sp)
# Build the path
final_path = "{0}{1}".format(
hostname,
new_path
)

# Check if folder is existing
status_code = requests.get(final_path, headers=headers).status_code
# If the folder exists, it will return a status code 200. Otherwise, we will need to create
if status_code != 200:
# Create the folder
requests.put(final_path, json={"name": sp, "type": "directory"}, headers=headers)


@gen.coroutine
def execute_notebook_remotely(self):
"""This function is used to execute a notebook thanks to a remote Jupyter instance using the REST API and a dedicated websocket
Expand Down Expand Up @@ -446,29 +479,26 @@ def execute_notebook_remotely(self):
# Write the notebook after execution
# Build output path notebook
if self.get_conf("output", "handler_type") is HttpHandler:
output_path = "{0}{1}{2}-{3}-{4}?token={5}".format(
output_path = "{0}/{1}/{2}-{3}?token={4}".format(
self.get_conf("output", "server_uri_http_api_contents"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
self.get_conf("output", "handler_http_service_api_token"),
)
output_path_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
output_path_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
self.get_conf("output", "hostname"),
self.get_conf("output", "handler_http_user"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
else:
output_path = "{0}{1}{2}-{3}-".format(
output_path = "{0}/{1}/{2}".format(
self.get_conf("output", "hostname"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
output_path_direct = output_path
pm.iorw.write_ipynb(nb_output, output_path)
Expand Down Expand Up @@ -523,29 +553,26 @@ def run(self):

# Build output path notebook
if self.get_conf("output", "handler_type") is HttpHandler:
output_notebook = "{0}{1}{2}-{3}-{4}?token={5}".format(
output_notebook = "{0}/{1}/{2}-{3}?token={4}".format(
self.get_conf("output", "server_uri_http_api_contents"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
self.get_conf("output", "handler_http_service_api_token"),
)
output_notebook_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
output_notebook_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
self.get_conf("output", "hostname"),
self.get_conf("output", "handler_http_user"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
else:
output_notebook = "{0}{1}{2}-{3}-".format(
output_notebook = "{0}/{1}/{2}-".format(
self.get_conf("output", "hostname"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
output_notebook_direct = output_notebook

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
},
{
"name": "output_folder",
"description": "[OUTPUT] Folder path in which executed notebooks will be stored",
"description": "[OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting datetime format (see 'strftime' function).",
"type": "string",
"multi": false,
"required": true,
Expand Down
8 changes: 6 additions & 2 deletions responders/Jupyter_Responder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ Here is the description for each parameter:
- `input_handler_http_is_jupyterhub`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- `input_handler_http_execute_remotely`: [INPUT][HTTP Handler] If you want to use the REST API to get the input notebook, you must indicate if you want to run your code locally (papermill) or remotely (websocket through HTTP), otherwise don't take this parameter into account
- `input_paths`: [INPUT] List of paths of the notebooks you want to run
output_hostname: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information. **Input paths must start with a "/"**.
- `output_hostname`: [OUTPUT] Hostname representing the Jupyter(Hub) instance (or Azure, S3 etc location) to reach to store the output notebook. See https://github.com/nteract/papermill#supported-name-handlers for more information.
- `output_handler_http_service_api_token`: [HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate an API token used by a dedicated service, otherwise don't take this parameter into account
- `output_handler_http_is_jupyterhub`: [OUTPUT][HTTP Handler] If you want to use the REST API to store the output notebook, you must indicate if you're behind a JupyterHub instance or not, otherwise don't take this parameter into account (Default: true)
- `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. **Output folder must start and end with a "/"**. (Default: /)
- `output_folder`: [OUTPUT] Folder path in which executed notebooks will be stored. This field is supporting format code for datetime such as the one used by the `strftime()` function.
- `any_handler_http_user`: [ANY][HTTP Handler] If you want to use the REST API directly (HTTP handler), you must indicate which user will be used as the reference for having the original notebooks, otherwise don't take this parameter into account.

Here is an example of what it could looks like:
Expand Down Expand Up @@ -197,4 +197,8 @@ root#> su cortex
cortex#> ipython kernel install --name "python3" --user
```

## I have some trouble with the library Papermill and more precisely on the file `papermill/iorw.py`

If you're using a hostname input or output starting with "http(s)", please check that you applied the patch mentionned above as expected. Otherwise, please raise an issue.

You can reach the developer directly by email: [email protected]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
72 changes: 49 additions & 23 deletions responders/Jupyter_Responder/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ def __init__(self):
"""Initialize the Jupyter analyzer"""
Responder.__init__(self)

# Initialize the ouput folder
self.output_folder = self.get_param(
"config.output_folder",
None,
"You must provide an output folder path in which executed notebooks will be stored",
)
today = datetime.date.today()
# Parse the output folder to catch datetime masks
self.output_folder = datetime.datetime.strftime(today,self.output_folder)

# Initialize configuration objects for notebooks
self.input_configuration = self.initialize_path(
hostname=self.get_param(
Expand Down Expand Up @@ -65,11 +75,6 @@ def __init__(self):
"[HTTP Handler only] JupyterHub for output notebook boolean parameter is missing.",
),
)
self.output_folder = self.get_param(
"config.output_folder",
None,
"You must provide an output folder path in which executed notebooks will be stored",
)

# Use input data as ID depending on the type
if self.data_type == "thehive:alert":
Expand Down Expand Up @@ -170,6 +175,9 @@ def initialize_path(
"server_uri_http_api_kernels"
].replace("http://", "ws://")

# Initialize the ouput path
self.create_output_path(hostname=result["server_uri_http_api_contents"],headers=result["handler_http_headers"])

return result

def get_conf(self, type, name):
Expand Down Expand Up @@ -317,6 +325,30 @@ def stop_server(self, hub_url, user, server_name=""):
# wait to poll again
time.sleep(1)

def create_output_path(self, hostname, headers=None):
"""This function is used to create the output path subfolders if they aren't existing yet
Args:
hostname (str): URL of the hostname
headers (dict, optional): Headers for the request. Defaults to None.
"""
subpaths = self.output_folder.split("/")
new_path = ""
for sp in subpaths:
new_path += "/{0}".format(sp)
# Build the path
final_path = "{0}{1}".format(
hostname,
new_path
)

# Check if folder is existing
status_code = requests.get(final_path, headers=headers).status_code
# If the folder exists, it will return a status code 200. Otherwise, we will need to create
if status_code != 200:
# Create the folder
requests.put(final_path, json={"name": sp, "type": "directory"}, headers=headers)

@gen.coroutine
def execute_notebook_remotely(self):
"""This function is used to execute a notebook thanks to a remote Jupyter instance using the REST API and a dedicated websocket
Expand Down Expand Up @@ -450,29 +482,26 @@ def execute_notebook_remotely(self):
# Write the notebook after execution
# Build output path notebook
if self.get_conf("output", "handler_type") is HttpHandler:
output_path = "{0}{1}{2}-{3}-{4}?token={5}".format(
output_path = "{0}/{1}/{2}-{3}?token={4}".format(
self.get_conf("output", "server_uri_http_api_contents"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
self.get_conf("output", "handler_http_service_api_token"),
)
output_path_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
output_path_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
self.get_conf("output", "hostname"),
self.get_conf("output", "handler_http_user"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
else:
output_path = "{0}{1}{2}-{3}-".format(
output_path = "{0}/{1}/{2}".format(
self.get_conf("output", "hostname"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
output_path_direct = output_path
pm.iorw.write_ipynb(nb_output, output_path)
Expand Down Expand Up @@ -527,29 +556,26 @@ def run(self):

# Build output path notebook
if self.get_conf("output", "handler_type") is HttpHandler:
output_notebook = "{0}{1}{2}-{3}-{4}?token={5}".format(
output_notebook = "{0}/{1}/{2}-{3}?token={4}".format(
self.get_conf("output", "server_uri_http_api_contents"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
self.get_conf("output", "handler_http_service_api_token"),
)
output_notebook_direct = "{0}/user/{1}/tree{2}{3}-{4}-{5}".format(
output_notebook_direct = "{0}/user/{1}/tree/{2}/{3}-{4}".format(
self.get_conf("output", "hostname"),
self.get_conf("output", "handler_http_user"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
else:
output_notebook = "{0}{1}{2}-{3}-".format(
output_notebook = "{0}/{1}/{2}-".format(
self.get_conf("output", "hostname"),
self.output_folder,
str(datetime.date.today()),
self.id,
path[1:],
path,
)
output_notebook_direct = output_notebook

Expand Down

0 comments on commit 787e62a

Please sign in to comment.