From fbfe53567c999cbbace4b8a2a1ae394436575a26 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Fri, 16 Jan 2026 16:46:08 -0500
Subject: [PATCH 1/2] Implemented a smart repo detection to handle MAD,
 MAD-private, MAD-internal in paths, automatically finds repo boundaries

---
 src/madengine/utils/config_parser.py | 259 +++++++++++++++++++++++----
 1 file changed, 229 insertions(+), 30 deletions(-)

diff --git a/src/madengine/utils/config_parser.py b/src/madengine/utils/config_parser.py
index 7d3e31e7..ec988570 100644
--- a/src/madengine/utils/config_parser.py
+++ b/src/madengine/utils/config_parser.py
@@ -3,6 +3,10 @@
 This module provides utilities to parse configuration files from model arguments
 and load them in various formats (CSV, JSON, YAML).
 
+Handles multiple repository patterns:
+- Standalone repos (MAD, MAD-private): ./scripts/model/configs/
+- Submodule in MAD-internal: ./scripts/{MAD|MAD-private}/model/configs/
+
 Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
 """
 
@@ -29,25 +33,194 @@ class ConfigParser:
     
     This class handles parsing configuration files in various formats
     (CSV, JSON, YAML) that are referenced in model arguments.
+    
+    Supports three usage patterns when run from MAD-internal CI:
+    1. MAD-internal models: ./scripts/model/configs/
+    2. MAD submodule: ./scripts/MAD/model/configs/
+    3. MAD-private submodule: ./scripts/MAD-private/model/configs/
+    
+    Also works when run standalone in MAD or MAD-private repos.
     """
     
+    # Known repository/submodule names to detect
+    KNOWN_REPOS = ['MAD', 'MAD-private', 'MAD-internal']
+    
     def __init__(self, scripts_base_dir: typing.Optional[str] = None):
         """Initialize ConfigParser.
         
         Args:
-            scripts_base_dir: Base directory for scripts (e.g., ~/amd/MAD-private/scripts)
+            scripts_base_dir: Base directory for scripts 
+                             (e.g., "scripts/MAD-private/pyt_atom")
         """
         self.scripts_base_dir = scripts_base_dir
+        self._path_cache = {}  # Cache resolved paths
+    
+    def _extract_repo_root(self, path: str) -> typing.Optional[str]:
+        """Extract repository root from a scripts path.
+        
+        Examples:
+            "scripts/MAD-private/pyt_atom" -> "scripts/MAD-private"
+            "scripts/MAD/vllm" -> "scripts/MAD"
+            "scripts/model" -> "scripts"
+            
+        Args:
+            path: Full or partial path containing scripts directory
+            
+        Returns:
+            Repository root path, or None if not identifiable
+        """
+        if not path:
+            return None
+        
+        parts = Path(path).parts
+        
+        # Find 'scripts' in the path
+        try:
+            scripts_idx = parts.index('scripts')
+        except ValueError:
+            return None
+        
+        # Check if next part after 'scripts' is a known repo name
+        if scripts_idx + 1 < len(parts):
+            next_part = parts[scripts_idx + 1]
+            if next_part in self.KNOWN_REPOS:
+                # It's a submodule: scripts/MAD-private or scripts/MAD
+                return os.path.join(*parts[:scripts_idx + 2])
+            else:
+                # It's MAD-internal's own models: scripts/model -> scripts
+                return os.path.join(*parts[:scripts_idx + 1])
+        
+        # Just 'scripts' directory
+        return os.path.join(*parts[:scripts_idx + 1])
+    
+    def _build_candidate_paths(
+        self, 
+        config_path: str, 
+        model_scripts_path: str = None
+    ) -> typing.List[str]:
+        """Build list of candidate paths to try in priority order.
+        
+        Args:
+            config_path: Relative config path (e.g., "configs/default.csv")
+            model_scripts_path: Path to model script file
+            
+        Returns:
+            List of full paths to try, in order of priority
+        """
+        candidates = []
+        
+        # Priority 1: Relative to model's immediate directory
+        # scripts/MAD-private/pyt_atom + configs/default.csv
+        if model_scripts_path:
+            scripts_dir = os.path.dirname(model_scripts_path)
+            if scripts_dir:
+                candidates.append(os.path.join(scripts_dir, config_path))
+        
+        # Priority 2: Relative to scripts_base_dir
+        # scripts/MAD-private/pyt_atom + configs/default.csv
+        if self.scripts_base_dir:
+            candidates.append(os.path.join(self.scripts_base_dir, config_path))
+        
+        # Priority 3: Relative to repository root (for shared configs)
+        # This handles: scripts/MAD-private/pyt_atom -> scripts/MAD-private/configs/
+        if self.scripts_base_dir:
+            repo_root = self._extract_repo_root(self.scripts_base_dir)
+            if repo_root:
+                candidates.append(os.path.join(repo_root, config_path))
+        
+        if model_scripts_path:
+            scripts_dir = os.path.dirname(model_scripts_path)
+            if scripts_dir:
+                repo_root = self._extract_repo_root(scripts_dir)
+                if repo_root:
+                    candidates.append(os.path.join(repo_root, config_path))
+        
+        # Priority 4: Walk up from model's directory
+        # Try parent directories up to repo root
+        if model_scripts_path:
+            scripts_dir = os.path.dirname(model_scripts_path)
+            repo_root = self._extract_repo_root(scripts_dir)
+            if repo_root and scripts_dir:
+                candidates.extend(
+                    self._walk_up_between(config_path, scripts_dir, repo_root)
+                )
+        
+        # Priority 5: Walk up from scripts_base_dir
+        if self.scripts_base_dir:
+            repo_root = self._extract_repo_root(self.scripts_base_dir)
+            if repo_root:
+                candidates.extend(
+                    self._walk_up_between(config_path, self.scripts_base_dir, repo_root)
+                )
+        
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_candidates = []
+        for path in candidates:
+            normalized = os.path.normpath(path)
+            if normalized not in seen:
+                seen.add(normalized)
+                unique_candidates.append(normalized)
+        
+        return unique_candidates
     
-    def parse_config_from_args(self, args_string: str, model_scripts_path: str = None) -> typing.Optional[str]:
-        """Extract config file path from model arguments.
+    def _walk_up_between(
+        self, 
+        config_path: str, 
+        start_dir: str, 
+        stop_dir: str
+    ) -> typing.List[str]:
+        """Generate candidate paths by walking up from start to stop directory.
+        
+        Args:
+            config_path: Relative config path
+            start_dir: Starting directory
+            stop_dir: Stop at this directory (inclusive)
+            
+        Returns:
+            List of candidate paths
+        """
+        candidates = []
+        current = os.path.abspath(start_dir)
+        stop = os.path.abspath(stop_dir)
+        
+        while current.startswith(stop):
+            parent = os.path.dirname(current)
+            if parent == current:  # Reached root
+                break
+            current = parent
+            candidates.append(os.path.join(current, config_path))
+            if current == stop:  # Reached stop directory
+                break
+        
+        return candidates
+    
+    def parse_config_from_args(
+        self, 
+        args_string: str, 
+        model_scripts_path: str = None
+    ) -> typing.Optional[str]:
+        """Extract and resolve config file path from model arguments.
+        
+        Resolution strategy:
+        1. If absolute path -> verify it exists
+        2. Try model's immediate directory
+        3. Try scripts_base_dir
+        4. Try repository root (scripts/MAD-private/, scripts/MAD/, scripts/)
+        5. Walk up from model directory to repo root
+        
+        This handles all cases:
+        - MAD-internal models: scripts/model/configs/default.csv
+        - MAD submodule: scripts/MAD/model/configs/default.csv
+        - MAD-private submodule: scripts/MAD-private/model/configs/default.csv
+        - Shared configs at repo level: scripts/MAD-private/configs/default.csv
         
         Args:
             args_string: The args field from models.json
-            model_scripts_path: Path to the model's script directory
+            model_scripts_path: Path to the model's script file (e.g., run.py)
             
         Returns:
-            Full path to config file, or None if no config found
+            Full path to config file, or None if not found
         """
         if not args_string:
             return None
@@ -59,35 +232,56 @@ def parse_config_from_args(self, args_string: str, model_scripts_path: str = Non
         
         config_path = config_match.group(1)
         
-        # If it's already an absolute path, return it
+        # Check cache first
+        cache_key = f"{config_path}::{model_scripts_path}::{self.scripts_base_dir}"
+        if cache_key in self._path_cache:
+            cached_path = self._path_cache[cache_key]
+            if os.path.exists(cached_path):
+                return cached_path
+            else:
+                del self._path_cache[cache_key]
+        
+        # Handle absolute paths
         if os.path.isabs(config_path):
-            return config_path if os.path.exists(config_path) else None
+            if os.path.exists(config_path):
+                self._path_cache[cache_key] = config_path
+                return config_path
+            else:
+                LOGGER.warning(f"Absolute config path does not exist: {config_path}")
+                return None
         
-        # Try to resolve relative path
-        # First, try relative to model scripts directory
-        if model_scripts_path:
-            scripts_dir = os.path.dirname(model_scripts_path)
-            full_path = os.path.join(scripts_dir, config_path)
-            if os.path.exists(full_path):
-                return full_path
+        # Build and try candidate paths
+        candidates = self._build_candidate_paths(config_path, model_scripts_path)
         
-        # Try relative to scripts_base_dir
-        if self.scripts_base_dir:
-            full_path = os.path.join(self.scripts_base_dir, config_path)
-            if os.path.exists(full_path):
-                return full_path
+        for candidate in candidates:
+            LOGGER.debug(f"Trying config path: {candidate}")
+            if os.path.exists(candidate):
+                LOGGER.info(f"Found config file at: {candidate}")
+                self._path_cache[cache_key] = candidate
+                return candidate
         
-        LOGGER.warning(f"Config file not found: {config_path}")
+        # Not found
+        LOGGER.warning(
+            f"Config file not found: {config_path}\n"
+            f"  model_scripts_path: {model_scripts_path}\n"
+            f"  scripts_base_dir: {self.scripts_base_dir}\n"
+            f"  Tried {len(candidates)} locations:\n"
+            + "\n".join(f"    - {c}" for c in candidates[:5])
+            + (f"\n    ... and {len(candidates)-5} more" if len(candidates) > 5 else "")
+        )
         return None
     
-    def load_config_file(self, config_path: str) -> typing.Optional[typing.Union[typing.List[dict], dict]]:
+    def load_config_file(
+        self, 
+        config_path: str
+    ) -> typing.Optional[typing.Union[typing.List[dict], dict]]:
         """Load and parse a configuration file.
         
         Args:
             config_path: Full path to the config file
             
         Returns:
-            For CSV: List of dicts (one per row)
+            For CSV: List of dicts (one per row, excluding empty rows)
             For JSON/YAML: Dict or list as-is from file
             None if file cannot be loaded
         """
@@ -117,13 +311,23 @@ def _load_csv(self, config_path: str) -> typing.List[dict]:
             config_path: Path to CSV file
             
         Returns:
-            List of dicts, one per row
+            List of dicts, one per row (excluding completely empty rows)
         """
         df = pd.read_csv(config_path)
+        
+        # Remove rows that are completely empty (all NaN)
+        # This handles blank lines in CSV files
+        df = df.dropna(how='all')
+        
         # Convert NaN to None for JSON serialization
         df = df.where(pd.notnull(df), None)
+        
         # Convert to list of dicts
-        return df.to_dict(orient='records')
+        configs = df.to_dict(orient='records')
+        
+        LOGGER.info(f"Loaded {len(configs)} config entries from {config_path}")
+        
+        return configs
     
     def _load_json(self, config_path: str) -> typing.Union[dict, list]:
         """Load JSON config file.
@@ -179,10 +383,6 @@ def match_config_to_result(
             return configs_list[0]
         
         # For multiple configs, try to match based on common fields
-        # Extract model identifier from result model name
-        # e.g., "pyt_vllm_llama-3.1-8b_perf_meta-llama_Llama-3.1-8B-Instruct" 
-        # should match config with model="meta-llama/Llama-3.1-8B-Instruct"
-        
         for config in configs_list:
             # Try to match on 'model' field if it exists in both
             if 'model' in config and 'model' in result_data:
@@ -212,7 +412,7 @@ def parse_and_load(
         
         Args:
             args_string: The args field from models.json
-            model_scripts_path: Path to the model's script directory
+            model_scripts_path: Path to the model's script file
             
         Returns:
             Config data (list of dicts for CSV, dict for JSON/YAML), or None
@@ -234,4 +434,3 @@ def get_config_parser(scripts_base_dir: typing.Optional[str] = None) -> ConfigPa
         ConfigParser instance
     """
     return ConfigParser(scripts_base_dir=scripts_base_dir)
-

From 3a4431741c50f51484905cf1e29bfe3bd6a14793 Mon Sep 17 00:00:00 2001
From: Stephen Shao <yu.shao@amd.com>
Date: Tue, 27 Jan 2026 13:31:54 -0600
Subject: [PATCH 2/2] Fixed the superset reporting system

---
 src/madengine/tools/run_models.py        |  3 +
 src/madengine/tools/update_perf_csv.py   | 24 +++---
 src/madengine/tools/update_perf_super.py | 98 ++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 17 deletions(-)

diff --git a/src/madengine/tools/run_models.py b/src/madengine/tools/run_models.py
index ec5a3c8d..3a16600c 100644
--- a/src/madengine/tools/run_models.py
+++ b/src/madengine/tools/run_models.py
@@ -1063,6 +1063,7 @@ def run_model(self, model_info: typing.Dict) -> bool:
             update_perf_super_json(
                 exception_result="perf_entry_super.json",
                 perf_super_json="perf_entry_super.json",
+                cumulative_json="perf_super.json",
                 scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
             )
         else:
@@ -1174,6 +1175,7 @@ def run_model(self, model_info: typing.Dict) -> bool:
                                 perf_super_json="perf_entry_super.json",
                                 model_name=run_details.model,
                                 common_info="common_info_super.json",
+                                cumulative_json="perf_super.json",
                                 scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
                             )
                         else:
@@ -1188,6 +1190,7 @@ def run_model(self, model_info: typing.Dict) -> bool:
                             update_perf_super_json(
                                 single_result="perf_entry_super.json",
                                 perf_super_json="perf_entry_super.json",
+                                cumulative_json="perf_super.json",
                                 scripts_base_dir=os.path.dirname(model_info.get("scripts", "")),
                             )
 
diff --git a/src/madengine/tools/update_perf_csv.py b/src/madengine/tools/update_perf_csv.py
index 6f8b84ee..2c99457a 100644
--- a/src/madengine/tools/update_perf_csv.py
+++ b/src/madengine/tools/update_perf_csv.py
@@ -128,12 +128,12 @@ def handle_multiple_results(
         model = r.pop("model")
         row["model"] = model_name + "_" + str(model)
         
-        # Only extract essential result columns for perf.csv
-        # The full details with all metrics are preserved in perf_entry_super.json
-        row["performance"] = r.get("performance")
-        row["metric"] = r.get("metric")
+        # Extract all columns from CSV result to ensure proper column alignment
+        # This ensures all result columns (benchmark, tp, inp, out, dtype, etc.) are captured
+        for key, value in r.items():
+            row[key] = value
 
-        if row["performance"] is not None and pd.notna(row["performance"]):
+        if row.get("performance") is not None and pd.notna(row.get("performance")):
             row["status"] = "SUCCESS"
         else:
             row["status"] = "FAILURE"
@@ -151,11 +151,15 @@ def handle_multiple_results(
         final_multiple_results_df = pd.concat(
             [final_multiple_results_df, pd.DataFrame(row_safe, index=[0])], ignore_index=True
         )
-        # Reorder columns according to existing perf csv
-        columns = perf_csv_df.columns.tolist()
-        # Add any additional columns to the end
-        columns = columns + [col for col in final_multiple_results_df.columns if col not in columns]
-        final_multiple_results_df = final_multiple_results_df[columns]
+
+    # Reorder columns according to existing perf csv (moved outside loop for efficiency)
+    if not final_multiple_results_df.empty:
+        desired_columns = perf_csv_df.columns.tolist()
+        # Add any additional columns from final_multiple_results_df
+        desired_columns = desired_columns + [col for col in final_multiple_results_df.columns if col not in desired_columns]
+        # Only select columns that actually exist in final_multiple_results_df to avoid KeyError
+        available_columns = [col for col in desired_columns if col in final_multiple_results_df.columns]
+        final_multiple_results_df = final_multiple_results_df[available_columns]
 
     perf_entry_df_to_csv(final_multiple_results_df)
     if perf_csv_df.empty:
diff --git a/src/madengine/tools/update_perf_super.py b/src/madengine/tools/update_perf_super.py
index 23bb1a15..a22fa314 100644
--- a/src/madengine/tools/update_perf_super.py
+++ b/src/madengine/tools/update_perf_super.py
@@ -114,8 +114,9 @@ def handle_multiple_results_super(
         result_model = result_row.pop("model")
         record["model"] = f"{model_name}_{result_model}"
         
-        # Update with result data
-        record.update(result_row)
+        # Extract metadata fields that should be at top level
+        record["performance"] = result_row.get("performance")
+        record["metric"] = result_row.get("metric")
         
         # Set status based on performance
         if record.get("performance") is not None and pd.notna(record.get("performance")):
@@ -123,6 +124,10 @@ def handle_multiple_results_super(
         else:
             record["status"] = "FAILURE"
         
+        # Store all result data in multi_results field (structured data)
+        # This captures additional metrics beyond the primary performance metric
+        record["multi_results"] = result_row
+        
         # Match config to this specific result
         if configs_data:
             if isinstance(configs_data, list):
@@ -198,22 +203,25 @@ def update_perf_super_json(
         common_info: typing.Optional[str] = None,
         model_name: typing.Optional[str] = None,
         scripts_base_dir: typing.Optional[str] = None,
+        cumulative_json: typing.Optional[str] = None,
     ) -> None:
     """Update the perf_entry_super.json file with the latest performance data.
     
     Args:
-        perf_super_json: Path to perf_entry_super.json file.
+        perf_super_json: Path to perf_entry_super.json file (latest run only).
         multiple_results: Path to multiple results CSV file.
         single_result: Path to single result JSON file.
         exception_result: Path to exception result JSON file.
         common_info: Path to common info JSON file.
         model_name: The model name.
         scripts_base_dir: Base directory for scripts (for config file resolution).
+        cumulative_json: Path to cumulative perf_super.json file (all runs). If provided,
+                        results will be appended to this file in addition to perf_entry_super.json.
     """
     print(f"Updating perf_entry_super.json with enhanced performance data")
     
-    # Load existing perf_entry_super.json
-    perf_super_list = load_perf_super_json(perf_super_json)
+    # Start with empty list for latest run
+    perf_super_list = []
     
     # Create config parser
     config_parser = ConfigParser(scripts_base_dir=scripts_base_dir)
@@ -237,7 +245,83 @@ def update_perf_super_json(
         print("No results to update in perf_entry_super.json")
         return
     
-    # Write updated perf_entry_super.json
+    # Write latest run to perf_entry_super.json
     write_json(perf_super_list, perf_super_json)
-    print(f"Successfully updated {perf_super_json}")
+    print(f"Successfully updated {perf_super_json} (latest run)")
+    
+    # Export latest run to CSV
+    export_perf_super_to_csv(perf_super_json)
+    
+    # Update cumulative database if path provided
+    if cumulative_json:
+        cumulative_list = load_perf_super_json(cumulative_json)
+        cumulative_list.extend(perf_super_list)
+        write_json(cumulative_list, cumulative_json)
+        print(f"Successfully updated {cumulative_json} (cumulative - {len(cumulative_list)} total entries)")
+        export_perf_super_to_csv(cumulative_json)
+
+
+def export_perf_super_to_csv(perf_super_json: str) -> None:
+    """Export perf_entry_super.json to CSV format.
+    
+    Flattens nested structures (multi_results, configs) and exports to CSV.
+    
+    Args:
+        perf_super_json: Path to perf_entry_super.json file.
+    """
+    if not os.path.exists(perf_super_json):
+        print(f"Warning: {perf_super_json} does not exist. Skipping CSV export.")
+        return
+    
+    try:
+        data = read_json(perf_super_json)
+        
+        # Handle both single dict and list
+        if isinstance(data, dict):
+            data = [data]
+        elif not isinstance(data, list):
+            print(f"Warning: Unexpected data type in {perf_super_json}")
+            return
+        
+        if len(data) == 0:
+            print(f"Warning: No data in {perf_super_json}")
+            return
+        
+        # Flatten nested structures for CSV
+        flattened_data = []
+        for record in data:
+            flat_record = {}
+            
+            for key, value in record.items():
+                if key == 'multi_results' and isinstance(value, dict):
+                    # Expand multi_results to top level with prefix
+                    for mr_key, mr_value in value.items():
+                        flat_record[f"mr_{mr_key}"] = mr_value
+                elif key == 'configs' and value is not None:
+                    # Convert configs to JSON string
+                    flat_record['configs'] = json.dumps(value)
+                elif isinstance(value, (list, dict)):
+                    # Convert other complex types to JSON strings
+                    flat_record[key] = json.dumps(value) if value else None
+                else:
+                    flat_record[key] = value
+            
+            flattened_data.append(flat_record)
+        
+        # Create DataFrame and export
+        df = pd.DataFrame(flattened_data)
+        
+        # Reorder columns: put important ones first
+        priority_cols = ['model', 'status', 'performance', 'metric']
+        other_cols = [col for col in df.columns if col not in priority_cols]
+        ordered_cols = [col for col in priority_cols if col in df.columns] + other_cols
+        df = df[ordered_cols]
+        
+        # Export to CSV
+        csv_filename = perf_super_json.replace('.json', '.csv')
+        df.to_csv(csv_filename, index=False)
+        print(f"Successfully exported {csv_filename}")
+        
+    except Exception as e:
+        print(f"Error exporting {perf_super_json} to CSV: {e}")