From eccd71a51aa4be9d5d42511c5db50e6e1931257b Mon Sep 17 00:00:00 2001 From: jmorascalyr <42879226+jmorascalyr@users.noreply.github.com> Date: Fri, 20 Feb 2026 06:23:06 -0700 Subject: [PATCH 1/3] feat: Add trace ID support and M365 parser improvements for correlation scenarios (#70) - Added CorrelationRunRequest model with trace_id, tag_phase, and tag_trace fields for correlation scenario execution - Implemented /correlation/run endpoint to execute scenarios with SIEM context and trace ID tagging - Updated start_correlation_scenario() and _execute_correlation_scenario() to accept and pass trace_id via S1_TRACE_ID environment variable - Added tag_phase and tag_trace boolean flags with S1_TAG --- Backend/api/app/routers/scenarios.py | 62 +++++++++++++++++++ Backend/api/app/services/scenario_service.py | 44 +++++++++++-- .../microsoft_365_collaboration.json | 2 +- .../scenarios/apollo_ransomware_scenario.py | 23 ++++--- .../microsoft_365_collaboration.conf | 16 ++++- .../microsoft_365_collaboration.conf | 2 +- 6 files changed, 131 insertions(+), 18 deletions(-) diff --git a/Backend/api/app/routers/scenarios.py b/Backend/api/app/routers/scenarios.py index eed6dd3..8489a41 100644 --- a/Backend/api/app/routers/scenarios.py +++ b/Backend/api/app/routers/scenarios.py @@ -6,6 +6,7 @@ import asyncio import time import json +import sys from datetime import datetime, timedelta from pathlib import Path as PathLib @@ -29,6 +30,17 @@ class SIEMQueryRequest(BaseModel): end_time_hours: int = 0 # How far back to end (default now) anchor_configs: Optional[List[Dict[str, Any]]] = None + +class CorrelationRunRequest(BaseModel): + """Request model for correlation scenario execution""" + scenario_id: str + destination_id: str + siem_context: Optional[Dict[str, Any]] = None + trace_id: Optional[str] = None + tag_phase: bool = True + tag_trace: bool = True + workers: int = 10 + # Initialize scenario service scenario_service = ScenarioService() @@ -278,6 +290,56 @@ async def execute_siem_query( raise HTTPException(status_code=500, detail=str(e)) +@router.post("/correlation/run", response_model=BaseResponse) +async def run_correlation_scenario( + request: CorrelationRunRequest, + background_tasks: BackgroundTasks, + _: str = Depends(require_write_access) +): + """ + Execute a correlation scenario with SIEM context and trace ID support + """ + try: + # Validate scenario exists and supports correlation + scenarios_dir = PathLib(__file__).parent.parent.parent / "scenarios" + if str(scenarios_dir) not in sys.path: + sys.path.insert(0, str(scenarios_dir)) + + scenario_modules = { + "apollo_ransomware_scenario": "apollo_ransomware_scenario" + } + + if request.scenario_id not in scenario_modules: + raise HTTPException(status_code=404, detail=f"Correlation scenario '{request.scenario_id}' not found") + + # Start correlation scenario execution with trace ID + execution_id = await scenario_service.start_correlation_scenario( + scenario_id=request.scenario_id, + siem_context=request.siem_context or {}, + trace_id=request.trace_id, + tag_phase=request.tag_phase, + tag_trace=request.tag_trace, + background_tasks=background_tasks + ) + + return BaseResponse( + success=True, + data={ + "execution_id": execution_id, + "scenario_id": request.scenario_id, + "status": "started", + "trace_id": request.trace_id, + "tag_phase": request.tag_phase, + "tag_trace": request.tag_trace, + "started_at": datetime.utcnow().isoformat() + } + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + # ============================================================================= # GENERIC SCENARIO ENDPOINTS (catch-all /{scenario_id} routes must be last) # ============================================================================= diff --git a/Backend/api/app/services/scenario_service.py b/Backend/api/app/services/scenario_service.py index 9a40a2f..c8e3d07 100644 --- a/Backend/api/app/services/scenario_service.py +++ b/Backend/api/app/services/scenario_service.py @@ -5,12 +5,12 @@ import uuid import time import asyncio +import json from datetime import datetime import logging import os import sys import importlib -import json from pathlib import Path logger = logging.getLogger(__name__) @@ -264,11 +264,14 @@ async def start_correlation_scenario( self, scenario_id: str, siem_context: Dict[str, Any], + trace_id: Optional[str] = None, + tag_phase: bool = True, + tag_trace: bool = True, speed: str = "fast", dry_run: bool = False, background_tasks=None ) -> str: - """Start correlation scenario execution with SIEM context""" + """Start correlation scenario execution with SIEM context and trace ID support""" execution_id = str(uuid.uuid4()) self.running_scenarios[execution_id] = { @@ -279,16 +282,35 @@ async def start_correlation_scenario( "speed": speed, "dry_run": dry_run, "siem_context": siem_context, + "trace_id": trace_id, + "tag_phase": tag_phase, + "tag_trace": tag_trace, "progress": 0 } if background_tasks: - background_tasks.add_task(self._execute_correlation_scenario, execution_id, scenario_id, siem_context) + background_tasks.add_task( + self._execute_correlation_scenario, + execution_id, + scenario_id, + siem_context, + trace_id, + tag_phase, + tag_trace + ) return execution_id - async def _execute_correlation_scenario(self, execution_id: str, scenario_id: str, siem_context: Dict[str, Any]): - """Execute correlation scenario with SIEM context""" + async def _execute_correlation_scenario( + self, + execution_id: str, + scenario_id: str, + siem_context: Dict[str, Any], + trace_id: Optional[str] = None, + tag_phase: bool = True, + tag_trace: bool = True + ): + """Execute correlation scenario with SIEM context and trace ID support""" import sys import os from pathlib import Path @@ -303,6 +325,12 @@ async def _execute_correlation_scenario(self, execution_id: str, scenario_id: st siem_context_json = json.dumps(siem_context) os.environ['SIEM_CONTEXT'] = siem_context_json + # Set trace ID and tagging environment variables + if trace_id: + os.environ['S1_TRACE_ID'] = trace_id + os.environ['S1_TAG_PHASE'] = '1' if tag_phase else '0' + os.environ['S1_TAG_TRACE'] = '1' if tag_trace else '0' + # Import and run the scenario module = __import__(scenario_id) scenario_result = module.generate_apollo_ransomware_scenario(siem_context=siem_context) @@ -321,8 +349,12 @@ async def _execute_correlation_scenario(self, execution_id: str, scenario_id: st self.running_scenarios[execution_id]["error"] = str(e) self.running_scenarios[execution_id]["completed_at"] = datetime.utcnow().isoformat() finally: - # Clean up environment variable + # Clean up environment variables os.environ.pop('SIEM_CONTEXT', None) + if trace_id: + os.environ.pop('S1_TRACE_ID', None) + os.environ.pop('S1_TAG_PHASE', None) + os.environ.pop('S1_TAG_TRACE', None) async def _execute_scenario(self, execution_id: str, scenario: Dict[str, Any]): """Execute scenario in background""" diff --git a/Backend/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.json b/Backend/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.json index 29e2a89..2d5869a 100644 --- a/Backend/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.json +++ b/Backend/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.json @@ -1,6 +1,6 @@ { "attributes": { - "dataSource.name": "Microsoft 365 Collaboration", + "dataSource.name": "Microsoft O365", "dataSource.vendor": "Microsoft", "dataSource.category": "security", "metadata.product.name": "Microsoft 365 SharePoint/OneDrive", diff --git a/Backend/scenarios/apollo_ransomware_scenario.py b/Backend/scenarios/apollo_ransomware_scenario.py index fca6fb0..fcd0f56 100644 --- a/Backend/scenarios/apollo_ransomware_scenario.py +++ b/Backend/scenarios/apollo_ransomware_scenario.py @@ -88,7 +88,8 @@ "name": "Apollo Ransomware - STARFLEET Attack", "description": "Correlates Proofpoint and M365 events with existing EDR/WEL data for the Apollo ransomware attack chain targeting STARFLEET.", - "default_query": """dataSource.name in ('SentinelOne','Windows Event Logs') endpoint.name contains ("Enterprise", "bridge") + "default_query": """dataSource.name in ('SentinelOne','Windows Event Logs') endpoint.name contains ("Enterprise", "bridge") AND (winEventLog.id = 4698 or * contains "apollo") + | group newest_timestamp = newest(timestamp), oldest_timestamp = oldest(timestamp) by event.type, src.process.user, endpoint.name, src.endpoint.ip.address, dst.ip.address | sort newest_timestamp | columns event.type, src.process.user, endpoint.name, oldest_timestamp, newest_timestamp, src.endpoint.ip.address, dst.ip.address""", @@ -450,8 +451,8 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: """Generate M365 events for email access and attachment download""" events = [] - # Email accessed - 5 minutes after delivery (user checks email) - email_access_time = get_scenario_time(base_time, 5) + # Email accessed - 30 seconds after Proofpoint delivery + email_access_time = get_scenario_time(base_time, 0, 30) m365_email_access = microsoft_365_collaboration_log() m365_email_access['TimeStamp'] = email_access_time m365_email_access['UserId'] = VICTIM_PROFILE['email'] @@ -467,8 +468,8 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: m365_email_access['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name events.append(create_event(email_access_time, "microsoft_365_collaboration", "email_interaction", m365_email_access)) - # Attachment preview/download - 6 minutes after delivery - attachment_time = get_scenario_time(base_time, 6) + # Attachment preview/download - 30 seconds after MailItemsAccessed (1 min after delivery) + attachment_time = get_scenario_time(base_time, 1) m365_attachment = microsoft_365_collaboration_log() m365_attachment['TimeStamp'] = attachment_time m365_attachment['UserId'] = VICTIM_PROFILE['email'] @@ -483,8 +484,8 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: m365_attachment['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name events.append(create_event(attachment_time, "microsoft_365_collaboration", "email_interaction", m365_attachment)) - # File opened in Excel Online / locally - 7 minutes after delivery - file_open_time = get_scenario_time(base_time, 7) + # File opened in Excel Online / locally - 30 seconds after FileDownloaded (1 min 30 sec after delivery) + file_open_time = get_scenario_time(base_time, 1, 30) m365_file_open = microsoft_365_collaboration_log() m365_file_open['TimeStamp'] = file_open_time m365_file_open['UserId'] = VICTIM_PROFILE['email'] @@ -785,9 +786,15 @@ def send_to_hec(event_data: dict, event_type: str, trace_id: str = None, phase: product = type_to_product.get(event_type, event_type) + # Special handling for dataSource.name to match expected values + if event_type == "microsoft_365_collaboration": + data_source_name = "Microsoft O365" + else: + data_source_name = event_type.replace('_', ' ').title() + attr_fields = { "dataSource.vendor": event_type.split('_')[0].title() if '_' in event_type else event_type.title(), - "dataSource.name": event_type.replace('_', ' ').title(), + "dataSource.name": data_source_name, "dataSource.category": "security" } diff --git a/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf b/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf index 29e2a89..5158a5e 100644 --- a/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf +++ b/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf @@ -1,6 +1,6 @@ { "attributes": { - "dataSource.name": "Microsoft 365 Collaboration", + "dataSource.name": "Microsoft O365", "dataSource.vendor": "Microsoft", "dataSource.category": "security", "metadata.product.name": "Microsoft 365 SharePoint/OneDrive", @@ -93,11 +93,23 @@ } }, { - "rename": { + "copy": { "from": "unmapped.Operation", "to": "activity_name" } }, + { + "copy": { + "from": "unmapped.Operation", + "to": "unmapped.operation" + } + }, + { + "copy": { + "from": "unmapped.Operation", + "to": "event.type" + } + }, { "rename": { "from": "unmapped.SiteUrl", diff --git a/Backend/utilities/parsers/sentinelone_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf b/Backend/utilities/parsers/sentinelone_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf index 29e2a89..2d5869a 100644 --- a/Backend/utilities/parsers/sentinelone_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf +++ b/Backend/utilities/parsers/sentinelone_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf @@ -1,6 +1,6 @@ { "attributes": { - "dataSource.name": "Microsoft 365 Collaboration", + "dataSource.name": "Microsoft O365", "dataSource.vendor": "Microsoft", "dataSource.category": "security", "metadata.product.name": "Microsoft 365 SharePoint/OneDrive", From 22b3b253a6bb6269d11a473589730a1fc089d488 Mon Sep 17 00:00:00 2001 From: jmorascalyr <42879226+jmorascalyr@users.noreply.github.com> Date: Fri, 20 Feb 2026 06:41:09 -0700 Subject: [PATCH 2/3] feat: Add user.email_addr field to M365 collaboration events for actor email correlation - Added user.email_addr field to M365 email interaction events (MailItemsAccessed, FileDownloaded, FileAccessed) using VICTIM_PROFILE['email'] - Updated microsoft_365_collaboration parser to copy unmapped.user.email_addr to user.email_addr for OCSF actor.user.email_addr mapping - Enables correlation of M365 collaboration events with email security events via actor email address --- Backend/scenarios/apollo_ransomware_scenario.py | 3 +++ .../microsoft_365_collaboration.conf | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/Backend/scenarios/apollo_ransomware_scenario.py b/Backend/scenarios/apollo_ransomware_scenario.py index fcd0f56..fb7895b 100644 --- a/Backend/scenarios/apollo_ransomware_scenario.py +++ b/Backend/scenarios/apollo_ransomware_scenario.py @@ -466,6 +466,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: m365_email_access['SiteUrl'] = f"https://outlook.office365.com/mail/inbox" # Parser-mapped fields for OCSF synthetic columns m365_email_access['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name + m365_email_access['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr events.append(create_event(email_access_time, "microsoft_365_collaboration", "email_interaction", m365_email_access)) # Attachment preview/download - 30 seconds after MailItemsAccessed (1 min after delivery) @@ -482,6 +483,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: m365_attachment['SiteUrl'] = f"https://outlook.office365.com/mail/inbox" # Parser-mapped fields for OCSF synthetic columns m365_attachment['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name + m365_attachment['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr events.append(create_event(attachment_time, "microsoft_365_collaboration", "email_interaction", m365_attachment)) # File opened in Excel Online / locally - 30 seconds after FileDownloaded (1 min 30 sec after delivery) @@ -498,6 +500,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: m365_file_open['SiteUrl'] = f"https://starfleet-my.sharepoint.com/personal/{VICTIM_PROFILE['username']}" # Parser-mapped fields for OCSF synthetic columns m365_file_open['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name + m365_file_open['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr events.append(create_event(file_open_time, "microsoft_365_collaboration", "file_access", m365_file_open)) return events diff --git a/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf b/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf index 5158a5e..a6932cd 100644 --- a/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf +++ b/Backend/utilities/parsers/community_new/ai-siem-main/parsers/community/microsoft_365_collaboration-latest/microsoft_365_collaboration.conf @@ -140,6 +140,12 @@ "to": "actor.user.name" } }, + { + "copy": { + "from": "unmapped.user.email_addr", + "to": "user.email_addr" + } + }, { "rename": { "from": "unmapped.Details", From 8d13ad555dac6d4cdeb5d65b1fa47c02c9788e8e Mon Sep 17 00:00:00 2001 From: jmorascalyr <42879226+jmorascalyr@users.noreply.github.com> Date: Fri, 20 Feb 2026 06:47:57 -0700 Subject: [PATCH 3/3] feat: Add object_id field to M365 email interaction events for mail items analysis - Added object_id field to MailItemsAccessed, FileDownloaded, and FileAccessed events with contextual paths (/Inbox/, /Attachments/, /Documents/) - Enables mail items analysis and tracking of malicious attachment flow through M365 collaboration events - Maps to OCSF object_id field for consistent object identification across email interaction phases --- Backend/scenarios/apollo_ransomware_scenario.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Backend/scenarios/apollo_ransomware_scenario.py b/Backend/scenarios/apollo_ransomware_scenario.py index fb7895b..cc2b46b 100644 --- a/Backend/scenarios/apollo_ransomware_scenario.py +++ b/Backend/scenarios/apollo_ransomware_scenario.py @@ -467,6 +467,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: # Parser-mapped fields for OCSF synthetic columns m365_email_access['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name m365_email_access['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr + m365_email_access['object_id'] = f"/Inbox/{ATTACKER_PROFILE['malicious_xlsx']}" # -> object_id for mail items analysis events.append(create_event(email_access_time, "microsoft_365_collaboration", "email_interaction", m365_email_access)) # Attachment preview/download - 30 seconds after MailItemsAccessed (1 min after delivery) @@ -484,6 +485,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: # Parser-mapped fields for OCSF synthetic columns m365_attachment['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name m365_attachment['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr + m365_attachment['object_id'] = f"/Attachments/{ATTACKER_PROFILE['malicious_xlsx']}" # -> object_id for mail items analysis events.append(create_event(attachment_time, "microsoft_365_collaboration", "email_interaction", m365_attachment)) # File opened in Excel Online / locally - 30 seconds after FileDownloaded (1 min 30 sec after delivery) @@ -501,6 +503,7 @@ def generate_m365_email_interaction(base_time: datetime) -> List[Dict]: # Parser-mapped fields for OCSF synthetic columns m365_file_open['RequestedBy'] = VICTIM_PROFILE['name'] # -> actor.user.name m365_file_open['user.email_addr'] = VICTIM_PROFILE['email'] # -> actor.user.email_addr + m365_file_open['object_id'] = f"/Documents/{ATTACKER_PROFILE['malicious_xlsx']}" # -> object_id for mail items analysis events.append(create_event(file_open_time, "microsoft_365_collaboration", "file_access", m365_file_open)) return events