From be2bcb3519aa0771728836c30952f366ac0ccf6a Mon Sep 17 00:00:00 2001
From: nusantara-self <15647296+nusantara-self@users.noreply.github.com>
Date: Thu, 27 Feb 2025 14:37:03 +0800
Subject: [PATCH 1/4] Yara3 -- Initial commit

---
 analyzers/Yara/Yara.json        |  22 ++-
 analyzers/Yara/yara_analyzer.py | 314 +++++++++++++++++++++++++++++---
 2 files changed, 303 insertions(+), 33 deletions(-)

diff --git a/analyzers/Yara/Yara.json b/analyzers/Yara/Yara.json
index d355d682e..323998152 100644
--- a/analyzers/Yara/Yara.json
+++ b/analyzers/Yara/Yara.json
@@ -1,10 +1,10 @@
 {
   "name": "Yara",
-  "author": "Nils Kuhnert, CERT-Bund",
+  "author": "Nils Kuhnert, CERT-Bund; Fabien Bloume, StrangeBee",
   "license": "AGPL-V3",
   "url": "https://github.com/BSI-CERT-Bund/cortex-analyzers",
-  "version": "2.0",
-  "description": "Check files against YARA rules.",
+  "version": "3.0",
+  "description": "Check files against YARA rules, either from local filesystem or from one or multiple GitHub repositories. NOTE: Performance & execution time may be much longer according to the number of rules checked.",
   "dataTypeList": ["file"],
   "command": "Yara/yara_analyzer.py",
   "baseConfig": "Yara",
@@ -14,7 +14,21 @@
       "description": "Define the path rules folder",
       "type": "string",
       "multi": true,
-      "required": true
+      "required": false
+    },
+    {
+      "name": "github_urls",
+      "description": "GitHub URLs to get rules from. Expected format: https://github.com/owner/repo/tree/main or https://github.com/owner/repo/tree/main/subdir",
+      "type": "string",
+      "multi": true,
+      "required": false
+    },
+    {
+      "name": "github_token",
+      "description": "PAT (recommended) in case of private repository or high frequency of pulls/executions",
+      "type": "string",
+      "multi": false,
+      "required": false
     }
   ]
 }
diff --git a/analyzers/Yara/yara_analyzer.py b/analyzers/Yara/yara_analyzer.py
index c3edb7db8..e50149b1f 100755
--- a/analyzers/Yara/yara_analyzer.py
+++ b/analyzers/Yara/yara_analyzer.py
@@ -4,68 +4,324 @@
 
 import os
 import yara
+import requests
 
+import base64
+import binascii
+import urllib.parse
+import re
+from html import unescape
+from codecs import decode
+
+import tempfile
+
+# Decoding functions, when such payloads are found (not shown in Analyzer template)
+def is_base64(s):
+    """Try Base64 decoding; return the decoded string or None if it fails"""
+    try:
+        decoded = base64.b64decode(s, validate=True)
+        return decoded.decode('utf-8', errors='ignore')  # Convert bytes to string
+    except Exception:
+        return None  # Not a valid Base64 string
+
+def is_hex(s):
+    """Detect and decode hex-encoded strings"""
+    try:
+        decoded = binascii.unhexlify(s).decode('utf-8', errors='ignore')
+        return decoded
+    except Exception:
+        return None  # Not a valid hex string
+
+def is_rot13(s):
+    """Detect ROT13 encoding"""
+    decoded = decode(s, 'rot_13')
+    return decoded if decoded != s else None  # If same, it wasn't ROT13
+
+def is_url_encoded(s):
+    """Detect and decode URL-encoded payloads"""
+    decoded = urllib.parse.unquote(s)
+    return decoded if decoded != s else None  # If same, it wasn't encoded
+
+def is_unicode_escape(s):
+    """Detect and decode Unicode escape sequences"""
+    try:
+        decoded = s.encode().decode('unicode_escape')
+        return decoded if decoded != s else None
+    except Exception:
+        return None
+
+def is_html_entity(s):
+    """Detect and decode HTML entity encoding"""
+    decoded = unescape(s)
+    return decoded if decoded != s else None  # If same, it wasn't encoded
+
+def is_xor_static_key(s, key=0x12):
+    """Attempt XOR decryption with a static key (useful for malware payloads)"""
+    try:
+        decoded = ''.join(chr(ord(c) ^ key) for c in s)
+        return decoded if decoded.isprintable() else None  # Only return readable text
+    except Exception:
+        return None
+
+def extract_rule_names_from_file(filepath):
+    """Get all YARA rule names from a file."""
+    try:
+        with open(filepath, "r") as f:
+            contents = f.read()
+        # This regex looks for lines that start (possibly with whitespace)
+        # followed by "rule", then a space and the rule identifier (letters, numbers, or underscores)
+        rule_names = re.findall(r'^\s*rule\s+([a-zA-Z0-9_]+)', contents, re.MULTILINE)
+        return rule_names
+    except Exception as e:
+        # If there is an error reading the file, return an empty list.
+        return []
+
+def extract_github_info(url):
+    """
+    Extract the repository identifier, branch, and subdirectory (if any) from a GitHub URL.
+    Expected URL formats:
+      - https://github.com/owner/repo/tree/main
+      - https://github.com/owner/repo/tree/main/subdir
+    Returns a dictionary with keys: 'repo' (owner/repo), 'branch', 'path'
+    """
+    pattern = r'github\.com/([^/]+)/([^/]+)(?:/tree/([^/]+)(?:/(.*))?)?'
+    match = re.search(pattern, url)
+    if match:
+        owner = match.group(1)
+        repo = match.group(2)
+        branch = match.group(3) if match.group(3) else 'main'
+        subdir = match.group(4) if match.group(4) else ""
+        return {"repo": f"{owner}/{repo}", "branch": branch, "path": subdir}
+    return None
 
 class YaraAnalyzer(Analyzer):
-    """Checking binaries through yara rules. This analyzer requires a list of yara rule paths in the cortex
-    configuration. If a path is given, an index file is expected."""
+    
+    def download_rules_from_github_url(self, url, token):
+        """
+        Download .yar rule files from a GitHub URL formatted like /tree/main[/optional/subdir]
+        using the GitHub API and a PAT. Throws an error via self.error if the repository cannot
+        be accessed or if a file download fails.
+
+        :param url: GitHub URL e.g. 'https://github.com/owner/repo/tree/main/subdir'
+        :param token: Personal Access Token with repo scope
+        :return: List of paths to the downloaded rule files
+        """
+        info = extract_github_info(url)
+        if not info:
+            self.error(f"Could not parse the GitHub URL: {url}")
+
+        repo_identifier = info["repo"]
+        branch = info["branch"]
+        path = info["path"]  # May be empty if no subdirectory is specified
+
+        downloaded_rule_files = []
+        headers = {"Authorization": f"token {token}"}
+
+        # Build the API URL: if a subdirectory is provided, include it in the URL
+        api_url = f"https://api.github.com/repos/{repo_identifier}/contents"
+        if path:
+            api_url += f"/{path}"
+        api_url += f"?ref={branch}"
+
+        response = requests.get(api_url, headers=headers)
+        if response.status_code != 200:
+            self.error(f"Error accessing repository contents: {response.status_code} - {response.text}")
+
+        contents = response.json()
+        # Normalize to a list if a single file is returned
+        if isinstance(contents, dict):
+            contents = [contents]
+        for item in contents:
+            if item["name"].endswith(".yar"):
+                download_url = item.get("download_url")
+                if download_url:
+                    file_response = requests.get(download_url, headers=headers)
+                    if file_response.status_code != 200:
+                        self.error(f"Error downloading file {item['name']}: {file_response.status_code}")
+                    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".yar")
+                    tmp_file.write(file_response.content)
+                    tmp_file.close()
+                    downloaded_rule_files.append(tmp_file.name)
+
+        return downloaded_rule_files
+
+
     def __init__(self):
         Analyzer.__init__(self)
 
         self.rulepaths = self.get_param('config.rules', None, 'No paths for rules provided.')
-        if isinstance(self.rulepaths, str):
+        if not self.rulepaths:
+            self.rulepaths = []  # Ensure it's a list even if nothing was provided
+        elif isinstance(self.rulepaths, str):
             self.rulepaths = [self.rulepaths]
+            
+        self.github_urls = self.get_param('config.github_urls', None, 'No GitHub URLs provided.')
+        self.github_token = self.get_param('config.github_token', None, 'No GitHub PAT provided.')
 
         self.ruleset = []
+        self.ignored_rules = []
+
         for rulepath in self.rulepaths:
-            if os.path.isfile(rulepath):
-                if rulepath[len(rulepath)-3:] == 'yar':
-                    self.ruleset.append(yara.compile(rulepath))
-                elif rulepath[len(rulepath)-3:] == 'yas':
-                    self.ruleset.append(yara.load(rulepath))
+            if os.path.isfile(rulepath) and rulepath.endswith('.yar'):
+                try:
+                    compiled_ruleset = yara.compile(filepath=rulepath)
+                    rule_names = extract_rule_names_from_file(rulepath)
+                    self.ruleset.append({
+                        "compiled": compiled_ruleset,
+                        "rule_names": rule_names,
+                        "source": rulepath
+                    })
+                except (yara.SyntaxError, yara.Error, Exception) as e:
+                    error_msg = f"Failed to load YARA rule file {rulepath} - {str(e)}"
+                    print(f"Warning: {error_msg}")
+                    self.ignored_rules.append({"source": rulepath, "error": str(e)})
+            
             elif os.path.isdir(rulepath):
-                if os.path.isfile(rulepath + '/index.yas'):
-                    self.ruleset.append(yara.load(rulepath + '/index.yas'))
-                elif os.path.isfile(rulepath + '/index.yar'):
-                    self.ruleset.append(yara.compile(rulepath + '/index.yar'))
+                rule_files = [os.path.join(rulepath, f) for f in os.listdir(rulepath) if f.endswith('.yar')]
 
-    def check(self, file):
+                if rule_files:
+                    for rule_file in rule_files:
+                        try:
+                            compiled_ruleset = yara.compile(filepath=rule_file)
+                            rule_names = extract_rule_names_from_file(rule_file)
+                            self.ruleset.append({
+                                "compiled": compiled_ruleset,
+                                "rule_names": rule_names,
+                                "source": rule_file
+                            })
+                        except (yara.SyntaxError, yara.Error, Exception) as e:
+                            error_msg = f"Failed to load YARA rule file {rule_file} - {str(e)}"
+                            print(f"Warning: {error_msg}")
+                            self.ignored_rules.append({"source": rule_file, "error": str(e)})
+                else:
+                    print(f"Warning: No .yar files found in directory {rulepath}")
+
+        if self.github_urls and self.github_token:
+            for url in self.github_urls:
+                github_rule_files = self.download_rules_from_github_url(url, self.github_token)
+                for rule_file in github_rule_files:
+                    try:
+                        compiled_ruleset = yara.compile(filepath=rule_file)
+                        rule_names = extract_rule_names_from_file(rule_file)
+                        self.ruleset.append({
+                            "compiled": compiled_ruleset,
+                            "rule_names": rule_names,
+                            "source": rule_file
+                        })
+                    except (yara.SyntaxError, yara.Error, Exception) as e:
+                        error_msg = f"Failed to load YARA rule file {rule_file} - {str(e)}"
+                        print(f"Warning: {error_msg}")
+                        self.ignored_rules.append({"source": rule_file, "error": str(e)})
+
+        if not self.ruleset:
+            print("Warning: No valid YARA rules were loaded.")
+            
+            
+    def check(self, file_path):
         """
-        Checks a given file against all available yara rules
+        Checks a given file against all available YARA rules.
 
-        :param file: Path to file
-        :type file:str
-        :returns: Python dictionary containing the results
-        :rtype: list
+        :param file_path: Path to file
+        :return: List of matched rule details, including multiple decoding methods
         """
-        result = []
-        for rule in self.ruleset:
-            matches = rule.match(file)
+
+        results = []
+        for idx, rule_obj in enumerate(self.ruleset):
+            try:
+                # Run the match on the file
+                matches = rule_obj["compiled"].match(file_path)
+            except Exception as e:
+                self.error(f"Error matching file '{file_path}' with ruleset from {rule_obj['source']} (index {idx}): {str(e)}")
+                continue
             for match in matches:
-                result.append(str(match))
+                try:
+                    decoded_strings = []
+                    for s in match.strings:
+                        try:
+                            matched_text = s[2].decode(errors='ignore')
+                        except Exception as e:
+                            matched_text = f"<decoding error: {str(e)}>"
+    
+                        # Apply all relevant decoding methods
+                        decoded_b64 = is_base64(matched_text)
+                        decoded_hex = is_hex(matched_text)
+                        decoded_rot13 = is_rot13(matched_text)
+                        decoded_url = is_url_encoded(matched_text)
+                        decoded_unicode = is_unicode_escape(matched_text)
+                        decoded_html = is_html_entity(matched_text)
+                        decoded_xor = is_xor_static_key(matched_text)
+    
+                        decoded_strings.append({
+                            "offset": s[0],
+                            "matched": matched_text,
+                            "base64_decoded": decoded_b64 if decoded_b64 else "N/A",
+                            "hex_decoded": decoded_hex if decoded_hex else "N/A",
+                            "rot13_decoded": decoded_rot13 if decoded_rot13 else "N/A",
+                            "url_decoded": decoded_url if decoded_url else "N/A",
+                            "unicode_decoded": decoded_unicode if decoded_unicode else "N/A",
+                            "html_decoded": decoded_html if decoded_html else "N/A",
+                            "xor_decoded": decoded_xor if decoded_xor else "N/A"
+                        })
+                    results.append({
+                        "rule": match.rule,
+                        "namespace": match.namespace if hasattr(match, "namespace") else "N/A",
+                        "strings": decoded_strings,
+                        "meta": match.meta
+                    })
+                except Exception as e:
+                    self.error(f"Error processing match from rule '{match.rule}' in file {rule_obj['source']}: {str(e)}")
+        return results
 
-        return result
 
     def summary(self, raw):
         taxonomies = []
         namespace = "Yara"
         predicate = "Match"
 
-        value = "{} rule(s)".format(len(raw["results"]))
-        if len(raw["results"]) == 0:
-            level = "safe"
+        if isinstance(raw, list):
+            match_count = len(raw)
+        elif isinstance(raw, dict) and "results" in raw:
+            match_count = len(raw["results"])
         else:
-            level = "malicious"
+            match_count = 0
+        
+        nb_of_rules = sum(len(rule_obj.get("rule_names", [])) for rule_obj in self.ruleset)
+        
+        value = f"{match_count}/{nb_of_rules} rule(s)"
+        level = "safe" if match_count == 0 else "malicious"
 
         taxonomies.append(self.build_taxonomy(level, namespace, predicate, value))
         return {"taxonomies": taxonomies}
 
     def run(self):
         if self.data_type == 'file':
-            self.report({'results': self.check(self.get_param('file'))})
+            matches = self.check(self.get_param('file'))
+            summary = self.summary(matches)
+            rule_names = []
+            try:
+                for ruleset in self.ruleset:
+                    rule_names.extend([rule.identifier for rule in ruleset])
+            except:
+                pass
+            try:
+                for rule_obj in self.ruleset:
+                    rule_names.extend(rule_obj.get("rule_names", []))
+            except:
+                pass
+            
+            
+            output = {
+                "results": matches,
+                "summary": summary,
+                "rules_tested": sum(len(rule_obj.get("rule_names", [])) for rule_obj in self.ruleset),
+                "rulenames": rule_names,
+                "total_yar_files": len(self.ruleset),
+                "ignored_rules": self.ignored_rules
+            }
+            self.report(output)
         else:
             self.error('Wrong data type.')
 
 
 if __name__ == '__main__':
-    YaraAnalyzer().run()
+    YaraAnalyzer().run()
\ No newline at end of file

From 024e84a112964908c1f3f7b5a273a04564176b49 Mon Sep 17 00:00:00 2001
From: nusantara-self <15647296+nusantara-self@users.noreply.github.com>
Date: Thu, 27 Feb 2025 14:51:46 +0800
Subject: [PATCH 2/4] Updated analyzer template

---
 thehive-templates/Yara_2_0/long.html          | 32 -------
 thehive-templates/Yara_3_0/long.html          | 84 +++++++++++++++++++
 .../{Yara_2_0 => Yara_3_0}/short.html         |  0
 3 files changed, 84 insertions(+), 32 deletions(-)
 delete mode 100644 thehive-templates/Yara_2_0/long.html
 create mode 100644 thehive-templates/Yara_3_0/long.html
 rename thehive-templates/{Yara_2_0 => Yara_3_0}/short.html (100%)

diff --git a/thehive-templates/Yara_2_0/long.html b/thehive-templates/Yara_2_0/long.html
deleted file mode 100644
index 8887feb10..000000000
--- a/thehive-templates/Yara_2_0/long.html
+++ /dev/null
@@ -1,32 +0,0 @@
-<div class="panel panel-danger" ng-if="success && content.results.length > 0">
-	<div class="panel-heading">
-		Yara Report
-	</div>
-	<div class="panel-body">
-		<dl class="dl-horizontal">
-			<dt>Matches</dt>
-			<dd ng-repeat="m in content.results">{{m}}</dd>
-		</dl>
-	</div>
-</div>
-<div class="panel panel-success" ng-if="success && content.results.length == 0">
-	<div class="panel-heading">
-		Yara Report
-	</div>
-	<div class="panel-body">
-		<span>No matches.</span>
-	</div>
-</div>
-
-<!-- General error  -->
-<div class="panel panel-danger" ng-if="!success">
-    <div class="panel-heading">
-        <strong>{{(artifact.data || artifact.attachment.name) | fang}}</strong>
-    </div>
-    <div class="panel-body">
-    	<dl class="dl-horizontal" ng-if="content.errorMessage">
-            <dt><i class="fa fa-warning"></i> Yara: </dt>
-            <dd class="wrap">{{content.errorMessage}}</dd>
-        </dl>
-    </div>
-</div>
diff --git a/thehive-templates/Yara_3_0/long.html b/thehive-templates/Yara_3_0/long.html
new file mode 100644
index 000000000..d45550b1e
--- /dev/null
+++ b/thehive-templates/Yara_3_0/long.html
@@ -0,0 +1,84 @@
+<!-- Summary Section -->
+<div class="alert alert-warning" ng-if="content.summary && content.summary.taxonomies.length && success">
+	<strong>Summary:</strong>
+	<ul>
+		<li ng-repeat="tax in content.summary.taxonomies">
+			<strong>{{tax.predicate}}:</strong> {{tax.value}} <small>(Level: {{tax.level}})</small>
+		</li>
+	</ul>
+</div>
+
+<!-- No Matches Found -->
+<div class="alert alert-success" ng-if="content.results.length === 0 && success">
+	<strong>No Yara rule matches were found.</strong>
+</div>
+
+<!-- Detailed Matches -->
+<div ng-if="content.results.length > 0 && success">
+	<h4>Detected Rules ({{content.results.length}})</h4>
+	<div ng-repeat="rule in content.results" class="panel panel-default">
+		<div class="panel-heading">
+			<h4 class="panel-title">
+				{{rule.rule}} <small>({{rule.namespace}})</small>
+			</h4>
+		</div>
+		<div class="panel-body">
+			<p>
+				<!-- Loop over all meta fields except ones with 'hash' in the key -->
+				<span ng-repeat="(key, value) in rule.meta" ng-if="key.toLowerCase().indexOf('hash') === -1">
+					<strong>{{ key | uppercase }}:</strong>
+					<!-- if the value looks like a URL, hyperlink it -->
+					<span ng-if="key.toLowerCase() === 'reference'">
+						<a ng-href="{{ value }}" target="_blank">{{ value }}</a>
+					</span>
+					<span ng-if="key.toLowerCase() !== 'reference'">
+						{{ value }}
+					</span>
+					<br>
+				</span>
+			</p>
+			<h5>Matched Strings</h5>
+			<table class="table table-striped table-condensed">
+				<thead>
+					<tr>
+						<th>Offset</th>
+						<th>Matched</th>
+					</tr>
+				</thead>
+				<tbody>
+					<tr ng-repeat="str in rule.strings">
+						<td>{{str.offset}}</td>
+						<td>{{str.matched}}</td>
+					</tr>
+				</tbody>
+			</table>
+		</div>
+	</div>
+</div>
+
+<!-- Additional Artifact/Scan Info -->
+<div class="well well-sm" ng-if="success">
+	<p>
+		<strong>Total YAR files:</strong> {{content.total_yar_files}}<br>
+		<strong>Total rules tested:</strong> {{content.rules_tested}}<br>
+		<details>
+			<summary>Rules scanned ({{content.rulenames.length}}): click to expand</summary>
+			<div>
+			  {{content.rulenames.join(', ')}}
+			</div>
+		  </details>
+		</p>
+</div>
+
+<!-- General error  -->
+<div class="panel panel-danger" ng-if="!success">
+	<div class="panel-heading">
+		<strong>{{(artifact.data || artifact.attachment.name) | fang}}</strong>
+	</div>
+	<div class="panel-body">
+		<dl class="dl-horizontal" ng-if="content.errorMessage">
+			<dt><i class="fa fa-warning"></i> Yara: </dt>
+			<dd class="wrap">{{content.errorMessage}}</dd>
+		</dl>
+	</div>
+</div>
\ No newline at end of file
diff --git a/thehive-templates/Yara_2_0/short.html b/thehive-templates/Yara_3_0/short.html
similarity index 100%
rename from thehive-templates/Yara_2_0/short.html
rename to thehive-templates/Yara_3_0/short.html

From 9545ca9633f08b89f137ba483ddab2b3c3a7643a Mon Sep 17 00:00:00 2001
From: nusantara-self <15647296+nusantara-self@users.noreply.github.com>
Date: Thu, 27 Feb 2025 14:54:17 +0800
Subject: [PATCH 3/4] Update requirements.txt

---
 analyzers/Yara/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/analyzers/Yara/requirements.txt b/analyzers/Yara/requirements.txt
index f2bc20aee..21410639a 100644
--- a/analyzers/Yara/requirements.txt
+++ b/analyzers/Yara/requirements.txt
@@ -1,2 +1,3 @@
-yara-python
 cortexutils
+yara-python
+requests
\ No newline at end of file

From 546141017fd60d1e36b8ab2ad339b43562340aa6 Mon Sep 17 00:00:00 2001
From: nusantara-self <15647296+nusantara-self@users.noreply.github.com>
Date: Thu, 27 Feb 2025 14:54:54 +0800
Subject: [PATCH 4/4] Add Alpine Dockerfile

---
 analyzers/Yara/Dockerfile | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 analyzers/Yara/Dockerfile

diff --git a/analyzers/Yara/Dockerfile b/analyzers/Yara/Dockerfile
new file mode 100644
index 000000000..cbb7323d8
--- /dev/null
+++ b/analyzers/Yara/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3-alpine
+WORKDIR /worker
+
+# Install build dependencies for compiling native extensions
+RUN apk add --no-cache \
+    gcc \
+    musl-dev \
+    libffi-dev 
+
+COPY requirements.txt Yara/
+RUN test ! -e Yara/requirements.txt || pip install --no-cache-dir -r Yara/requirements.txt
+COPY . Yara/
+
+ENTRYPOINT ["python", "Yara/yara_analyzer.py"]
\ No newline at end of file