From fbfe53567c999cbbace4b8a2a1ae394436575a26 Mon Sep 17 00:00:00 2001 From: Stephen Shao Date: Fri, 16 Jan 2026 16:46:08 -0500 Subject: [PATCH 1/2] Implemented a smart repo detection to handle MAD, MAD-private, MAD-internal in paths, automatically finds repo boundaries --- src/madengine/utils/config_parser.py | 259 +++++++++++++++++++++++---- 1 file changed, 229 insertions(+), 30 deletions(-) diff --git a/src/madengine/utils/config_parser.py b/src/madengine/utils/config_parser.py index 7d3e31e7..ec988570 100644 --- a/src/madengine/utils/config_parser.py +++ b/src/madengine/utils/config_parser.py @@ -3,6 +3,10 @@ This module provides utilities to parse configuration files from model arguments and load them in various formats (CSV, JSON, YAML). +Handles multiple repository patterns: +- Standalone repos (MAD, MAD-private): ./scripts/model/configs/ +- Submodule in MAD-internal: ./scripts/{MAD|MAD-private}/model/configs/ + Copyright (c) Advanced Micro Devices, Inc. All rights reserved. """ @@ -29,25 +33,194 @@ class ConfigParser: This class handles parsing configuration files in various formats (CSV, JSON, YAML) that are referenced in model arguments. + + Supports three usage patterns when run from MAD-internal CI: + 1. MAD-internal models: ./scripts/model/configs/ + 2. MAD submodule: ./scripts/MAD/model/configs/ + 3. MAD-private submodule: ./scripts/MAD-private/model/configs/ + + Also works when run standalone in MAD or MAD-private repos. """ + # Known repository/submodule names to detect + KNOWN_REPOS = ['MAD', 'MAD-private', 'MAD-internal'] + def __init__(self, scripts_base_dir: typing.Optional[str] = None): """Initialize ConfigParser. Args: - scripts_base_dir: Base directory for scripts (e.g., ~/amd/MAD-private/scripts) + scripts_base_dir: Base directory for scripts + (e.g., "scripts/MAD-private/pyt_atom") """ self.scripts_base_dir = scripts_base_dir + self._path_cache = {} # Cache resolved paths + + def _extract_repo_root(self, path: str) -> typing.Optional[str]: + """Extract repository root from a scripts path. + + Examples: + "scripts/MAD-private/pyt_atom" -> "scripts/MAD-private" + "scripts/MAD/vllm" -> "scripts/MAD" + "scripts/model" -> "scripts" + + Args: + path: Full or partial path containing scripts directory + + Returns: + Repository root path, or None if not identifiable + """ + if not path: + return None + + parts = Path(path).parts + + # Find 'scripts' in the path + try: + scripts_idx = parts.index('scripts') + except ValueError: + return None + + # Check if next part after 'scripts' is a known repo name + if scripts_idx + 1 < len(parts): + next_part = parts[scripts_idx + 1] + if next_part in self.KNOWN_REPOS: + # It's a submodule: scripts/MAD-private or scripts/MAD + return os.path.join(*parts[:scripts_idx + 2]) + else: + # It's MAD-internal's own models: scripts/model -> scripts + return os.path.join(*parts[:scripts_idx + 1]) + + # Just 'scripts' directory + return os.path.join(*parts[:scripts_idx + 1]) + + def _build_candidate_paths( + self, + config_path: str, + model_scripts_path: str = None + ) -> typing.List[str]: + """Build list of candidate paths to try in priority order. + + Args: + config_path: Relative config path (e.g., "configs/default.csv") + model_scripts_path: Path to model script file + + Returns: + List of full paths to try, in order of priority + """ + candidates = [] + + # Priority 1: Relative to model's immediate directory + # scripts/MAD-private/pyt_atom + configs/default.csv + if model_scripts_path: + scripts_dir = os.path.dirname(model_scripts_path) + if scripts_dir: + candidates.append(os.path.join(scripts_dir, config_path)) + + # Priority 2: Relative to scripts_base_dir + # scripts/MAD-private/pyt_atom + configs/default.csv + if self.scripts_base_dir: + candidates.append(os.path.join(self.scripts_base_dir, config_path)) + + # Priority 3: Relative to repository root (for shared configs) + # This handles: scripts/MAD-private/pyt_atom -> scripts/MAD-private/configs/ + if self.scripts_base_dir: + repo_root = self._extract_repo_root(self.scripts_base_dir) + if repo_root: + candidates.append(os.path.join(repo_root, config_path)) + + if model_scripts_path: + scripts_dir = os.path.dirname(model_scripts_path) + if scripts_dir: + repo_root = self._extract_repo_root(scripts_dir) + if repo_root: + candidates.append(os.path.join(repo_root, config_path)) + + # Priority 4: Walk up from model's directory + # Try parent directories up to repo root + if model_scripts_path: + scripts_dir = os.path.dirname(model_scripts_path) + repo_root = self._extract_repo_root(scripts_dir) + if repo_root and scripts_dir: + candidates.extend( + self._walk_up_between(config_path, scripts_dir, repo_root) + ) + + # Priority 5: Walk up from scripts_base_dir + if self.scripts_base_dir: + repo_root = self._extract_repo_root(self.scripts_base_dir) + if repo_root: + candidates.extend( + self._walk_up_between(config_path, self.scripts_base_dir, repo_root) + ) + + # Remove duplicates while preserving order + seen = set() + unique_candidates = [] + for path in candidates: + normalized = os.path.normpath(path) + if normalized not in seen: + seen.add(normalized) + unique_candidates.append(normalized) + + return unique_candidates - def parse_config_from_args(self, args_string: str, model_scripts_path: str = None) -> typing.Optional[str]: - """Extract config file path from model arguments. + def _walk_up_between( + self, + config_path: str, + start_dir: str, + stop_dir: str + ) -> typing.List[str]: + """Generate candidate paths by walking up from start to stop directory. + + Args: + config_path: Relative config path + start_dir: Starting directory + stop_dir: Stop at this directory (inclusive) + + Returns: + List of candidate paths + """ + candidates = [] + current = os.path.abspath(start_dir) + stop = os.path.abspath(stop_dir) + + while current.startswith(stop): + parent = os.path.dirname(current) + if parent == current: # Reached root + break + current = parent + candidates.append(os.path.join(current, config_path)) + if current == stop: # Reached stop directory + break + + return candidates + + def parse_config_from_args( + self, + args_string: str, + model_scripts_path: str = None + ) -> typing.Optional[str]: + """Extract and resolve config file path from model arguments. + + Resolution strategy: + 1. If absolute path -> verify it exists + 2. Try model's immediate directory + 3. Try scripts_base_dir + 4. Try repository root (scripts/MAD-private/, scripts/MAD/, scripts/) + 5. Walk up from model directory to repo root + + This handles all cases: + - MAD-internal models: scripts/model/configs/default.csv + - MAD submodule: scripts/MAD/model/configs/default.csv + - MAD-private submodule: scripts/MAD-private/model/configs/default.csv + - Shared configs at repo level: scripts/MAD-private/configs/default.csv Args: args_string: The args field from models.json - model_scripts_path: Path to the model's script directory + model_scripts_path: Path to the model's script file (e.g., run.py) Returns: - Full path to config file, or None if no config found + Full path to config file, or None if not found """ if not args_string: return None @@ -59,35 +232,56 @@ def parse_config_from_args(self, args_string: str, model_scripts_path: str = Non config_path = config_match.group(1) - # If it's already an absolute path, return it + # Check cache first + cache_key = f"{config_path}::{model_scripts_path}::{self.scripts_base_dir}" + if cache_key in self._path_cache: + cached_path = self._path_cache[cache_key] + if os.path.exists(cached_path): + return cached_path + else: + del self._path_cache[cache_key] + + # Handle absolute paths if os.path.isabs(config_path): - return config_path if os.path.exists(config_path) else None + if os.path.exists(config_path): + self._path_cache[cache_key] = config_path + return config_path + else: + LOGGER.warning(f"Absolute config path does not exist: {config_path}") + return None - # Try to resolve relative path - # First, try relative to model scripts directory - if model_scripts_path: - scripts_dir = os.path.dirname(model_scripts_path) - full_path = os.path.join(scripts_dir, config_path) - if os.path.exists(full_path): - return full_path + # Build and try candidate paths + candidates = self._build_candidate_paths(config_path, model_scripts_path) - # Try relative to scripts_base_dir - if self.scripts_base_dir: - full_path = os.path.join(self.scripts_base_dir, config_path) - if os.path.exists(full_path): - return full_path + for candidate in candidates: + LOGGER.debug(f"Trying config path: {candidate}") + if os.path.exists(candidate): + LOGGER.info(f"Found config file at: {candidate}") + self._path_cache[cache_key] = candidate + return candidate - LOGGER.warning(f"Config file not found: {config_path}") + # Not found + LOGGER.warning( + f"Config file not found: {config_path}\n" + f" model_scripts_path: {model_scripts_path}\n" + f" scripts_base_dir: {self.scripts_base_dir}\n" + f" Tried {len(candidates)} locations:\n" + + "\n".join(f" - {c}" for c in candidates[:5]) + + (f"\n ... and {len(candidates)-5} more" if len(candidates) > 5 else "") + ) return None - def load_config_file(self, config_path: str) -> typing.Optional[typing.Union[typing.List[dict], dict]]: + def load_config_file( + self, + config_path: str + ) -> typing.Optional[typing.Union[typing.List[dict], dict]]: """Load and parse a configuration file. Args: config_path: Full path to the config file Returns: - For CSV: List of dicts (one per row) + For CSV: List of dicts (one per row, excluding empty rows) For JSON/YAML: Dict or list as-is from file None if file cannot be loaded """ @@ -117,13 +311,23 @@ def _load_csv(self, config_path: str) -> typing.List[dict]: config_path: Path to CSV file Returns: - List of dicts, one per row + List of dicts, one per row (excluding completely empty rows) """ df = pd.read_csv(config_path) + + # Remove rows that are completely empty (all NaN) + # This handles blank lines in CSV files + df = df.dropna(how='all') + # Convert NaN to None for JSON serialization df = df.where(pd.notnull(df), None) + # Convert to list of dicts - return df.to_dict(orient='records') + configs = df.to_dict(orient='records') + + LOGGER.info(f"Loaded {len(configs)} config entries from {config_path}") + + return configs def _load_json(self, config_path: str) -> typing.Union[dict, list]: """Load JSON config file. @@ -179,10 +383,6 @@ def match_config_to_result( return configs_list[0] # For multiple configs, try to match based on common fields - # Extract model identifier from result model name - # e.g., "pyt_vllm_llama-3.1-8b_perf_meta-llama_Llama-3.1-8B-Instruct" - # should match config with model="meta-llama/Llama-3.1-8B-Instruct" - for config in configs_list: # Try to match on 'model' field if it exists in both if 'model' in config and 'model' in result_data: @@ -212,7 +412,7 @@ def parse_and_load( Args: args_string: The args field from models.json - model_scripts_path: Path to the model's script directory + model_scripts_path: Path to the model's script file Returns: Config data (list of dicts for CSV, dict for JSON/YAML), or None @@ -234,4 +434,3 @@ def get_config_parser(scripts_base_dir: typing.Optional[str] = None) -> ConfigPa ConfigParser instance """ return ConfigParser(scripts_base_dir=scripts_base_dir) - From 3a4431741c50f51484905cf1e29bfe3bd6a14793 Mon Sep 17 00:00:00 2001 From: Stephen Shao Date: Tue, 27 Jan 2026 13:31:54 -0600 Subject: [PATCH 2/2] Fixed the superset reporting system --- src/madengine/tools/run_models.py | 3 + src/madengine/tools/update_perf_csv.py | 24 +++--- src/madengine/tools/update_perf_super.py | 98 ++++++++++++++++++++++-- 3 files changed, 108 insertions(+), 17 deletions(-) diff --git a/src/madengine/tools/run_models.py b/src/madengine/tools/run_models.py index ec5a3c8d..3a16600c 100644 --- a/src/madengine/tools/run_models.py +++ b/src/madengine/tools/run_models.py @@ -1063,6 +1063,7 @@ def run_model(self, model_info: typing.Dict) -> bool: update_perf_super_json( exception_result="perf_entry_super.json", perf_super_json="perf_entry_super.json", + cumulative_json="perf_super.json", scripts_base_dir=os.path.dirname(model_info.get("scripts", "")), ) else: @@ -1174,6 +1175,7 @@ def run_model(self, model_info: typing.Dict) -> bool: perf_super_json="perf_entry_super.json", model_name=run_details.model, common_info="common_info_super.json", + cumulative_json="perf_super.json", scripts_base_dir=os.path.dirname(model_info.get("scripts", "")), ) else: @@ -1188,6 +1190,7 @@ def run_model(self, model_info: typing.Dict) -> bool: update_perf_super_json( single_result="perf_entry_super.json", perf_super_json="perf_entry_super.json", + cumulative_json="perf_super.json", scripts_base_dir=os.path.dirname(model_info.get("scripts", "")), ) diff --git a/src/madengine/tools/update_perf_csv.py b/src/madengine/tools/update_perf_csv.py index 6f8b84ee..2c99457a 100644 --- a/src/madengine/tools/update_perf_csv.py +++ b/src/madengine/tools/update_perf_csv.py @@ -128,12 +128,12 @@ def handle_multiple_results( model = r.pop("model") row["model"] = model_name + "_" + str(model) - # Only extract essential result columns for perf.csv - # The full details with all metrics are preserved in perf_entry_super.json - row["performance"] = r.get("performance") - row["metric"] = r.get("metric") + # Extract all columns from CSV result to ensure proper column alignment + # This ensures all result columns (benchmark, tp, inp, out, dtype, etc.) are captured + for key, value in r.items(): + row[key] = value - if row["performance"] is not None and pd.notna(row["performance"]): + if row.get("performance") is not None and pd.notna(row.get("performance")): row["status"] = "SUCCESS" else: row["status"] = "FAILURE" @@ -151,11 +151,15 @@ def handle_multiple_results( final_multiple_results_df = pd.concat( [final_multiple_results_df, pd.DataFrame(row_safe, index=[0])], ignore_index=True ) - # Reorder columns according to existing perf csv - columns = perf_csv_df.columns.tolist() - # Add any additional columns to the end - columns = columns + [col for col in final_multiple_results_df.columns if col not in columns] - final_multiple_results_df = final_multiple_results_df[columns] + + # Reorder columns according to existing perf csv (moved outside loop for efficiency) + if not final_multiple_results_df.empty: + desired_columns = perf_csv_df.columns.tolist() + # Add any additional columns from final_multiple_results_df + desired_columns = desired_columns + [col for col in final_multiple_results_df.columns if col not in desired_columns] + # Only select columns that actually exist in final_multiple_results_df to avoid KeyError + available_columns = [col for col in desired_columns if col in final_multiple_results_df.columns] + final_multiple_results_df = final_multiple_results_df[available_columns] perf_entry_df_to_csv(final_multiple_results_df) if perf_csv_df.empty: diff --git a/src/madengine/tools/update_perf_super.py b/src/madengine/tools/update_perf_super.py index 23bb1a15..a22fa314 100644 --- a/src/madengine/tools/update_perf_super.py +++ b/src/madengine/tools/update_perf_super.py @@ -114,8 +114,9 @@ def handle_multiple_results_super( result_model = result_row.pop("model") record["model"] = f"{model_name}_{result_model}" - # Update with result data - record.update(result_row) + # Extract metadata fields that should be at top level + record["performance"] = result_row.get("performance") + record["metric"] = result_row.get("metric") # Set status based on performance if record.get("performance") is not None and pd.notna(record.get("performance")): @@ -123,6 +124,10 @@ def handle_multiple_results_super( else: record["status"] = "FAILURE" + # Store all result data in multi_results field (structured data) + # This captures additional metrics beyond the primary performance metric + record["multi_results"] = result_row + # Match config to this specific result if configs_data: if isinstance(configs_data, list): @@ -198,22 +203,25 @@ def update_perf_super_json( common_info: typing.Optional[str] = None, model_name: typing.Optional[str] = None, scripts_base_dir: typing.Optional[str] = None, + cumulative_json: typing.Optional[str] = None, ) -> None: """Update the perf_entry_super.json file with the latest performance data. Args: - perf_super_json: Path to perf_entry_super.json file. + perf_super_json: Path to perf_entry_super.json file (latest run only). multiple_results: Path to multiple results CSV file. single_result: Path to single result JSON file. exception_result: Path to exception result JSON file. common_info: Path to common info JSON file. model_name: The model name. scripts_base_dir: Base directory for scripts (for config file resolution). + cumulative_json: Path to cumulative perf_super.json file (all runs). If provided, + results will be appended to this file in addition to perf_entry_super.json. """ print(f"Updating perf_entry_super.json with enhanced performance data") - # Load existing perf_entry_super.json - perf_super_list = load_perf_super_json(perf_super_json) + # Start with empty list for latest run + perf_super_list = [] # Create config parser config_parser = ConfigParser(scripts_base_dir=scripts_base_dir) @@ -237,7 +245,83 @@ def update_perf_super_json( print("No results to update in perf_entry_super.json") return - # Write updated perf_entry_super.json + # Write latest run to perf_entry_super.json write_json(perf_super_list, perf_super_json) - print(f"Successfully updated {perf_super_json}") + print(f"Successfully updated {perf_super_json} (latest run)") + + # Export latest run to CSV + export_perf_super_to_csv(perf_super_json) + + # Update cumulative database if path provided + if cumulative_json: + cumulative_list = load_perf_super_json(cumulative_json) + cumulative_list.extend(perf_super_list) + write_json(cumulative_list, cumulative_json) + print(f"Successfully updated {cumulative_json} (cumulative - {len(cumulative_list)} total entries)") + export_perf_super_to_csv(cumulative_json) + + +def export_perf_super_to_csv(perf_super_json: str) -> None: + """Export perf_entry_super.json to CSV format. + + Flattens nested structures (multi_results, configs) and exports to CSV. + + Args: + perf_super_json: Path to perf_entry_super.json file. + """ + if not os.path.exists(perf_super_json): + print(f"Warning: {perf_super_json} does not exist. Skipping CSV export.") + return + + try: + data = read_json(perf_super_json) + + # Handle both single dict and list + if isinstance(data, dict): + data = [data] + elif not isinstance(data, list): + print(f"Warning: Unexpected data type in {perf_super_json}") + return + + if len(data) == 0: + print(f"Warning: No data in {perf_super_json}") + return + + # Flatten nested structures for CSV + flattened_data = [] + for record in data: + flat_record = {} + + for key, value in record.items(): + if key == 'multi_results' and isinstance(value, dict): + # Expand multi_results to top level with prefix + for mr_key, mr_value in value.items(): + flat_record[f"mr_{mr_key}"] = mr_value + elif key == 'configs' and value is not None: + # Convert configs to JSON string + flat_record['configs'] = json.dumps(value) + elif isinstance(value, (list, dict)): + # Convert other complex types to JSON strings + flat_record[key] = json.dumps(value) if value else None + else: + flat_record[key] = value + + flattened_data.append(flat_record) + + # Create DataFrame and export + df = pd.DataFrame(flattened_data) + + # Reorder columns: put important ones first + priority_cols = ['model', 'status', 'performance', 'metric'] + other_cols = [col for col in df.columns if col not in priority_cols] + ordered_cols = [col for col in priority_cols if col in df.columns] + other_cols + df = df[ordered_cols] + + # Export to CSV + csv_filename = perf_super_json.replace('.json', '.csv') + df.to_csv(csv_filename, index=False) + print(f"Successfully exported {csv_filename}") + + except Exception as e: + print(f"Error exporting {perf_super_json} to CSV: {e}")