From 2cf0c6f8b85daf2aee8c86df0146fe95ba2c4410 Mon Sep 17 00:00:00 2001
From: jmorascalyr <42879226+jmorascalyr@users.noreply.github.com>
Date: Wed, 18 Feb 2026 16:19:08 -0700
Subject: [PATCH] feat: Add SIEM correlation scenario execution with context
 injection and parser improvements

- Added start_correlation_scenario() and _execute_correlation_scenario() methods to ScenarioService for executing scenarios with SIEM context passed via environment variable
- Updated Proofpoint event generator to handle overrides for phishScore-based threat type determination and added click-related fields (clickIP, clickTime, threatURL) for parser detection
- Rewrote Proofpoint parser from complex multi
---
 Backend/api/app/services/scenario_service.py  |  64 ++++
 .../email_security/proofpoint.py              |  34 +-
 .../proofpoint_proofpoint_logs.json           | 357 ++++--------------
 .../scenarios/apollo_ransomware_scenario.py   |  16 +-
 4 files changed, 188 insertions(+), 283 deletions(-)

diff --git a/Backend/api/app/services/scenario_service.py b/Backend/api/app/services/scenario_service.py
index e5420e9..9a40a2f 100644
--- a/Backend/api/app/services/scenario_service.py
+++ b/Backend/api/app/services/scenario_service.py
@@ -260,6 +260,70 @@ async def start_scenario(
         
         return execution_id
     
+    async def start_correlation_scenario(
+        self, 
+        scenario_id: str, 
+        siem_context: Dict[str, Any],
+        speed: str = "fast", 
+        dry_run: bool = False,
+        background_tasks=None
+    ) -> str:
+        """Start correlation scenario execution with SIEM context"""
+        execution_id = str(uuid.uuid4())
+        
+        self.running_scenarios[execution_id] = {
+            "scenario_id": scenario_id,
+            "execution_id": execution_id,
+            "status": "running",
+            "started_at": datetime.utcnow().isoformat(),
+            "speed": speed,
+            "dry_run": dry_run,
+            "siem_context": siem_context,
+            "progress": 0
+        }
+        
+        if background_tasks:
+            background_tasks.add_task(self._execute_correlation_scenario, execution_id, scenario_id, siem_context)
+        
+        return execution_id
+    
+    async def _execute_correlation_scenario(self, execution_id: str, scenario_id: str, siem_context: Dict[str, Any]):
+        """Execute correlation scenario with SIEM context"""
+        import sys
+        import os
+        from pathlib import Path
+        
+        # Add scenarios directory to path
+        scenarios_dir = Path(__file__).parent.parent.parent / "scenarios"
+        if str(scenarios_dir) not in sys.path:
+            sys.path.insert(0, str(scenarios_dir))
+        
+        try:
+            # Set SIEM context environment variable for the scenario
+            siem_context_json = json.dumps(siem_context)
+            os.environ['SIEM_CONTEXT'] = siem_context_json
+            
+            # Import and run the scenario
+            module = __import__(scenario_id)
+            scenario_result = module.generate_apollo_ransomware_scenario(siem_context=siem_context)
+            
+            # Update execution status
+            if execution_id in self.running_scenarios:
+                self.running_scenarios[execution_id]["status"] = "completed"
+                self.running_scenarios[execution_id]["progress"] = 100
+                self.running_scenarios[execution_id]["completed_at"] = datetime.utcnow().isoformat()
+                self.running_scenarios[execution_id]["result"] = scenario_result
+            
+        except Exception as e:
+            logger.error(f"Correlation scenario execution failed: {e}")
+            if execution_id in self.running_scenarios:
+                self.running_scenarios[execution_id]["status"] = "failed"
+                self.running_scenarios[execution_id]["error"] = str(e)
+                self.running_scenarios[execution_id]["completed_at"] = datetime.utcnow().isoformat()
+        finally:
+            # Clean up environment variable
+            os.environ.pop('SIEM_CONTEXT', None)
+    
     async def _execute_scenario(self, execution_id: str, scenario: Dict[str, Any]):
         """Execute scenario in background"""
         try:
diff --git a/Backend/event_generators/email_security/proofpoint.py b/Backend/event_generators/email_security/proofpoint.py
index 3802d04..3362310 100644
--- a/Backend/event_generators/email_security/proofpoint.py
+++ b/Backend/event_generators/email_security/proofpoint.py
@@ -192,8 +192,18 @@ def proofpoint_log(overrides: dict | None = None) -> Dict:
     Pass `overrides` to force any field to a specific value:
         proofpoint_log({"phishScore": 95})
     """
-    # Determine threat type
-    threat_type = random.choice(THREAT_TYPES)
+    # Apply overrides first to determine threat type
+    if overrides:
+        # If phishScore is high, make it malicious
+        if overrides.get("phishScore", 0) > 50:
+            threat_type = random.choice(["phish", "malware", "spam"])
+        elif overrides.get("phishScore", 0) == 0:
+            threat_type = "none"
+        else:
+            threat_type = random.choice(THREAT_TYPES)
+    else:
+        threat_type = random.choice(THREAT_TYPES)
+    
     is_malicious = threat_type != "none"
     
     # Generate sender and recipient
@@ -275,6 +285,26 @@ def proofpoint_log(overrides: dict | None = None) -> Dict:
     # Add message parts
     event["messageParts"] = _generate_message_parts(threat_type)
     
+    # Add click-related fields for parser detection
+    if is_malicious:
+        event["clickIP"] = _generate_ip()
+        event["clickTime"] = (message_time + timedelta(minutes=random.randint(1, 30))).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
+        event["threatURL"] = f"https://threatinsight.proofpoint.com/#/threat_id/{uuid.uuid4()}"
+        event["userAgent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+        event["event.type"] = "Click"
+    else:
+        event["event.type"] = "Email"
+    
+    # Add unmapped fields and other required fields
+    event["unmapped.classification"] = threat_type
+    event["unmapped.recipient"] = recipient_email
+    event["unmapped.sender"] = sender_email
+    event["url.url_string"] = f"https://threatinsight.proofpoint.com/#/threat_id/{uuid.uuid4()}"
+    event["device.ip"] = _generate_ip()
+    
+    # Add timestamp field directly
+    event["timestamp"] = message_time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
+    
     # Add SPF, DKIM, DMARC results
     event["spf"] = random.choice(["pass", "fail", "softfail", "neutral", "none"])
     event["dkimv"] = random.choice(["pass", "fail", "none"])
diff --git a/Backend/parsers/community/proofpoint_proofpoint_logs-latest/proofpoint_proofpoint_logs.json b/Backend/parsers/community/proofpoint_proofpoint_logs-latest/proofpoint_proofpoint_logs.json
index 4a708f0..614244b 100644
--- a/Backend/parsers/community/proofpoint_proofpoint_logs-latest/proofpoint_proofpoint_logs.json
+++ b/Backend/parsers/community/proofpoint_proofpoint_logs-latest/proofpoint_proofpoint_logs.json
@@ -1,288 +1,85 @@
 {
-    attributes: {
-      "dataSource.category": "security",
-      "dataSource.name": "Proofpoint",
-      "dataSource.vendor": "Proofpoint"
-    },
-    formats: [
+  attributes: {
+    "dataSource.vendor": "Proofpoint",
+    "dataSource.category": "security"
+  },
+  formats: [
+    {
+      id: "format1",
+      format: "${parse=dottedjson}$"
+      rewrites: [
+        {input: "messageTime", output: "timestamp", match: ".*", replace: "$0"},
+        {input: "clickTime", output: "unmapped.clickTime", match: ".*", replace: "$0"},
+
+        {input: "threatsInfoMap", output: "unmapped.threatsInfoMap", match: ".*", replace: "$0"},
+        {input: "messageParts", output: "unmapped.messageParts", match: ".*", replace: "$0"},
+        {input: "quarantineFolder", output: "unmapped.quarantineFolder", match: ".*", replace: "$0"},
+        {input: "impostorScore", output: "unmapped.impostorScore", match: ".*", replace: "$0"},
+        {input: "phishScore", output: "unmapped.phishScore", match: ".*", replace: "$0"},
+        {input: "policyRoutes", output: "unmapped.policyRoutes", match: ".*", replace: "$0"},
+
+        {input: "threatStatus", output: "unmapped.threatStatus", match: ".*", replace: "$0"},
+        {input: "threatsInfoMap[0].threatStatus", output: "unmapped.threatStatus", match: ".*", replace: "$0"},
+
+        {input: "classification", output: "unmapped.classification", match: ".*", replace: "$0"},
+        {input: "threatsInfoMap[0].classification", output: "unmapped.classification", match: ".*", replace: "$0"},
+
+        {input: "sender", output: "unmapped.sender", match: ".*", replace: "$0"},
+        {input: "recipient", output: "unmapped.recipient", match: ".*", replace: "$0"},
+
+        {input: "sender", output: "email.from", match: ".*", replace: "$0"},
+        {input: "fromAddress[0]", output: "email.from", match: ".*", replace: "$0"},
+        {input: "fromAddress", output: "email.from", match: ".*", replace: "$0"},
+
+        {input: "recipient", output: "email.to", match: ".*", replace: "$0"},
+        {input: "toAddresses[0]", output: "email.to", match: ".*", replace: "$0"},
+        {input: "toAddresses", output: "email.to", match: ".*", replace: "$0"},
+
+        {input: "recipient", output: "actor.user.name", match: ".*", replace: "$0"},
+        {input: "toAddresses[0]", output: "actor.user.name", match: ".*", replace: "$0"},
+        {input: "toAddresses", output: "actor.user.name", match: ".*", replace: "$0"},
+
+        {input: "messageSize", output: "email.size", match: ".*", replace: "$0"},
+        {input: "headerFrom", output: "email.smtp_from", match: ".*", replace: "$0"},
+
+        {input: "oext", output: "oext", match: ".*", replace: "$0"},
+        {input: "mod", output: "mod", match: ".*", replace: "$0"}
+      ]
+    }
+  ],
+  mappings: {
+    version: 1,
+    mappings: [
       {
-        format: ".*${parse=json}{attrBlacklist=(xmailer|campaignId|threatsInfoMap|messageParts)}$"
-         rewrites: [
-           {
-             input:   "GUID",
-             output:  "message.guid",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "QID",
-             output:  "message.queueid",
-             match:   ".*",
-             replace: "$0"
-           },  
-           {
-             input:   "ccAddresses",
-             output:  "email.cc",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "cluster",
-             output:  "email_activity.cluster",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "completelyRewritten",
-             output:  "message.is_rewritten",
-             match:   ".*",
-             replace: "$0"
-           }, 
-           {
-             input:   "fromAddress",
-             output:  "email.from_address",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "headerFrom",
-             output:  "email.from",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "headerReplyTo",
-             output:  "email.reply_to",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "id",
-             output:  "email_activity.activity_id",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "impostorScore",
-             output:  "email_activity.impostor_score",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "malwareScore",
-             output:  "email_activity.malware_score",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageID",
-             output:  "message.id",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageSize",
-             output:  "message.size",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageTime",
-             output:  "email_activity.time_dt",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "modulesRun",
-             output:  "email_activity.modules_run",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "phishScore",
-             output:  "email_activity.severity",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "policyRoutes",
-             output:  "policy.route",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "quarantineFolder",
-             output:  "file.parent_folder",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "quarantineRule",
-             output:  "policy.rule",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "recipient",
-             output:  "email.to",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "replyToAddress",
-             output:  "email.reply_to_address",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "sender",
-             output:  "email.sender_email",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "senderIP",
-             output:  "email.sender_ip",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "spamScore",
-             output:  "device.risk_score",
-             match:   ".*",
-             replace: "$0"
-           }
-           {
-             input:   "subject",
-             output:  "email.subject",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "toAddresses",
-             output:  "email.delivered_to",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "clickIP",
-             output:  "device.ip",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "clickTime",
-             output:  "email_activity.start_time_dt",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatID",
-             output:  "threat.id",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatURL",
-             output:  "threat.url",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatStatus",
-             output:  "threat.status",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "userAgent",
-             output:  "http_request.user_agent",
-             match:   ".*",
-             replace: "$0"
-           }
-         ]
+        predicate: "clickTime = * OR clickIP = * OR threatURL = *",
+        transformations: [
+          { constant: { field: "dataSource.name", value: "Proofpoint" } },
+          { constant: { field: "event.type", value: "Click" } },
+          { copy: { from: "fromAddress[0]", to: "email.from" } },
+          { copy: { from: "toAddresses[0]", to: "email.to" } },
+          { copy: { from: "toAddresses[0]", to: "actor.user.name" } },
+          { copy: { from: "recipient", to: "actor.user.name" } }
+        ]
       },
       {
-        format: ".*messageParts\".\\s.$messageParts.{parse=json}{attrBlacklist=(disposition|oContentType)}$"
-        rewrites: [
-          {
-             input:   "messageParts.contentType", 
-             output:  "event.type",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageParts.contentType", 
-             output:  "message.type",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageParts.filename", 
-             output:  "file.name",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageParts.md5", 
-             output:  "file.md5",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "messageParts.sha256", 
-             output:  "file.sha256",
-             match:   ".*",
-             replace: "$0"
-           },
-         ]
+        predicate: "messageTime = * OR messageParts = * OR threatsInfoMap = *",
+        transformations: [
+          { constant: { field: "dataSource.name", value: "Proofpoint" } },
+          { constant: { field: "event.type", value: "Email" } },
+          { copy: { from: "fromAddress[0]", to: "email.from" } },
+          { copy: { from: "toAddresses[0]", to: "email.to" } },
+          { copy: { from: "toAddresses[0]", to: "actor.user.name" } },
+          { copy: { from: "recipient", to: "actor.user.name" } }
+        ]
       },
       {
-        format: ".*threatsInfoMap\".\\s.$threatsInfoMap.{parse=json}$"
-         rewrites: [
-          {
-             input:   "threatsInfoMap.classification", 
-             output:  "threat.classification",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threat", 
-             output:  "threat.name",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threatID", 
-             output:  "threat.id",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threatStatus", 
-             output:  "threat.status",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threatTime", 
-             output:  "threat.time",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threatType", 
-             output:  "threat.type",
-             match:   ".*",
-             replace: "$0"
-           },
-           {
-             input:   "threatsInfoMap.threatUrl", 
-             output:  "threat.url",
-             match:   ".*",
-             replace: "$0"
-           },
-         ]
+        predicate: "true",
+        transformations: [
+          { constant: { field: "dataSource.name", value: "Proofpoint" } },
+          { constant: { field: "event.type", value: "Other" } },
+
+        ]
       }
     ]
-  }
\ No newline at end of file
+  }
+}
\ No newline at end of file
diff --git a/Backend/scenarios/apollo_ransomware_scenario.py b/Backend/scenarios/apollo_ransomware_scenario.py
index 2564efd..fca6fb0 100644
--- a/Backend/scenarios/apollo_ransomware_scenario.py
+++ b/Backend/scenarios/apollo_ransomware_scenario.py
@@ -56,7 +56,7 @@
 
 # Attack Profile - correlates with existing OCSF alert data
 ATTACKER_PROFILE = {
-    "sender_email": "hr-updates@starfleet-benefits.com",
+    "sender_email": "hr-updates@starfleat.com",
     "sender_name": "Starfleet HR Benefits",
     "sender_ip": "45.33.32.156",
     "malicious_xlsx": "TestBook.xlsm",
@@ -367,6 +367,7 @@ def generate_proofpoint_phishing_delivery(base_time: datetime) -> List[Dict]:
     events = []
     
     delivery_time = get_scenario_time(base_time, 0)
+    delivery_time_dt = base_time  # Keep datetime for calculations
     
     pf_event = {
         "GUID": str(uuid.uuid4()),
@@ -407,6 +408,19 @@ def generate_proofpoint_phishing_delivery(base_time: datetime) -> List[Dict]:
         ],
         "toAddresses": [VICTIM_PROFILE["email"]],
         "xmailer": "Microsoft Outlook 16.0",
+        # Add unmapped fields and other required fields
+        "unmapped.classification": "malware",
+        "unmapped.recipient": VICTIM_PROFILE["email"],
+        "unmapped.sender": ATTACKER_PROFILE["sender_email"],
+        "url.url_string": f"https://threatinsight.proofpoint.com/#/threat_id/{uuid.uuid4()}",
+        "device.ip": ATTACKER_PROFILE["sender_ip"],
+        # Add click-related fields for parser detection
+        "clickIP": ATTACKER_PROFILE["sender_ip"],
+        "clickTime": (datetime.fromisoformat(delivery_time.replace('Z', '+00:00')) + timedelta(minutes=5)).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z",
+        "threatURL": f"https://threatinsight.proofpoint.com/#/threat_id/{uuid.uuid4()}",
+        "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+        "event.type": "Click",
+        "timestamp": delivery_time,
         "messageParts": [
             {
                 "disposition": "attached",