From d0d8f9d2ee18633f0235cdba5c8daca6662f82a2 Mon Sep 17 00:00:00 2001 From: dapopov-st Date: Mon, 21 Apr 2025 08:47:22 -0400 Subject: [PATCH] Add PEFT adapter format conversion support and fix yaml file extension --- llmfoundry/command_utils/eval.py | 138 +++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/llmfoundry/command_utils/eval.py b/llmfoundry/command_utils/eval.py index 9523a5e..5ea5fb8 100644 --- a/llmfoundry/command_utils/eval.py +++ b/llmfoundry/command_utils/eval.py @@ -566,3 +566,141 @@ def eval_from_yaml( yaml_cfg = om.merge(yaml_cfg, cli_cfg) assert isinstance(yaml_cfg, DictConfig) return evaluate(yaml_cfg) + + +def convert_peft_adapter_format(model_dir: str) -> None: + """Convert PEFT adapter from safetensors to bin format to avoid device metadata issues. + + This function performs three operations: + 1. Converts the adapter weights from safetensors to PyTorch .bin format + 2. Renames the original safetensors file to .safetensors.bak + 3. Updates the adapter_config.json to reference .bin files instead of .safetensors + + Args: + model_dir: Full path to the model directory containing PEFT adapter files. + This should be the directory containing: + - adapter_config.json + - adapter_model.safetensors + Example: '/model-checkpoints/llama3-1b-lora-20250420_180800' + + Returns: + None + + Side Effects: + - Creates adapter_model.bin in model_dir + - Renames adapter_model.safetensors to adapter_model.safetensors.bak + - Modifies adapter_config.json to reference .bin files + """ + import torch + import json + import os + + # Paths for the adapter files + adapter_path = os.path.join(model_dir, "adapter_model.safetensors") + bin_adapter_path = os.path.join(model_dir, "adapter_model.bin") + config_path = os.path.join(model_dir, "adapter_config.json") + + try: + # Load and convert if needed + if os.path.exists(adapter_path) and not os.path.exists(bin_adapter_path): + # Load safetensors adapter with explicit CPU device + from safetensors.torch import load_file + weights = load_file(adapter_path, device="cpu") + + # Save as PyTorch bin format + torch.save(weights, bin_adapter_path) + print(f"Converted adapter to .bin format: {bin_adapter_path}") + + # Rename/move safetensors file to force bin usage + if os.path.exists(adapter_path): + backup_path = os.path.join(model_dir, "adapter_model.safetensors.bak") + os.rename(adapter_path, backup_path) + print(f"Moved safetensors file to {backup_path} to force bin usage") + + # Update config to reference .bin file + if os.path.exists(config_path): + with open(config_path, 'r') as f: + config = json.load(f) + + # Update config to use bin file + weight_map = config.get("weight_map", {}) + for key in weight_map: + if "safetensors" in weight_map[key]: + weight_map[key] = weight_map[key].replace("safetensors", "bin") + + # Also update model_type if needed + if "safetensors" in config.get("model_type", ""): + config["model_type"] = config["model_type"].replace("safetensors", "bin") + + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + + print(f"Updated adapter config to use .bin format") + except Exception as e: + print(f"Failed to convert adapter format: {e}") + + +def restore_safetensors_after_eval(model_dir: str) -> None: + """Restore safetensor files to their original state after evaluation. + + This function reverses the changes made by convert_peft_adapter_format(): + 1. Restores the original adapter_model.safetensors from .bak file if it exists + 2. Updates the adapter_config.json to reference .safetensors again + 3. Keeps the .bin file in place for potential future use + + Args: + model_dir: Full path to the model directory containing PEFT adapter files. + This should be the directory containing: + - adapter_config.json + - adapter_model.bin + - adapter_model.safetensors.bak (created by convert_peft_adapter_format) + Example: '/model-checkpoints/llama3-1b-lora-20250420_180800' + + Returns: + None + + Side Effects: + - Restores adapter_model.safetensors from the .bak file if it exists + - Modifies adapter_config.json to reference .safetensors files + - Keeps adapter_model.bin for potential future use + """ + import os + import json + + # Paths for the adapter files + backup_path = os.path.join(model_dir, "adapter_model.safetensors.bak") + adapter_path = os.path.join(model_dir, "adapter_model.safetensors") + config_path = os.path.join(model_dir, "adapter_config.json") + + # Only restore if backup exists + if os.path.exists(backup_path): + if os.path.exists(adapter_path): + print(f"Safetensors file already exists at {adapter_path}, skipping restore") + else: + os.rename(backup_path, adapter_path) + print(f"Restored safetensors file from backup") + + # Update config only if needed + if os.path.exists(config_path): + with open(config_path, 'r') as f: + config = json.load(f) + + # Check if config needs updating + needs_update = False + weight_map = config.get("weight_map", {}) + + for key in weight_map: + if "bin" in weight_map[key]: + weight_map[key] = weight_map[key].replace("bin", "safetensors") + needs_update = True + + if "bin" in config.get("model_type", ""): + config["model_type"] = config["model_type"].replace("bin", "safetensors") + needs_update = True + + if needs_update: + with open(config_path, 'w') as f: + json.dump(config, f, indent=2) + print(f"Updated adapter config to use safetensors format") + else: + print(f"No backup found at {backup_path}, nothing to restore") \ No newline at end of file