diff --git a/SWEBENCH_MODULE_README.md b/SWEBENCH_MODULE_README.md
new file mode 100644
index 0000000..bb39657
--- /dev/null
+++ b/SWEBENCH_MODULE_README.md
@@ -0,0 +1,166 @@
+# TProfiler SWE-bench 性能分析和评测模块
+
+## 概述
+
+SWE-bench模块是TProfiler的一个扩展功能，用于评测AI模型在软件工程任务上的性能表现。该模块实现了对AI模型解决真实GitHub issues能力的自动化评测。
+
+## 主要功能
+
+1. **任务管理**
+   - 支持从GitHub加载真实的软件工程任务
+   - 支持自定义任务创建
+   - 任务难度分级和分类
+
+2. **模型评测**
+   - 支持多种主流AI模型（GPT-4、Claude、Llama等）
+   - 自动调用模型API生成解决方案
+   - 在Docker容器中安全执行代码
+
+3. **性能分析**
+   - 执行时间统计
+   - 资源使用监控（CPU、内存）
+   - API调用和Token使用统计
+   - 成本估算
+
+4. **测试验证**
+   - 自动应用生成的补丁
+   - 运行项目测试套件
+   - 解析测试结果
+
+5. **报告生成**
+   - 多格式报告（文本、HTML、JSON、CSV）
+   - 详细的性能指标
+   - 可视化结果展示
+
+## 使用方法
+
+### 1. 配置
+
+编辑 `swebench.properties` 文件：
+
+```properties
+# 基本配置
+swebench.parallel.tasks=4
+swebench.task.timeout=30
+swebench.max.retry=3
+
+# 模型API配置
+swebench.model.api.url=https://api.openai.com/v1/completions
+swebench.model.api.key=your-api-key-here
+swebench.model.max.tokens=4096
+
+# Docker配置
+swebench.docker.image=swebench/eval:latest
+
+# 数据集类型：full, lite, verified
+swebench.dataset.type=lite
+```
+
+### 2. 启动评测
+
+#### 命令行模式
+
+```bash
+# 开始评测
+./swebench-client start GPT-4
+
+# 停止评测
+./swebench-client stop
+
+# 查看状态
+./swebench-client status
+
+# 列出支持的模型
+./swebench-client list
+
+# 查看帮助
+./swebench-client help
+```
+
+#### 交互模式
+
+直接运行 `./swebench-client` 进入交互式菜单。
+
+### 3. 查看结果
+
+评测完成后，报告会保存在配置的报告路径下（默认为 `~/swebench-reports`）：
+
+- `swebench_<model>_<timestamp>.txt` - 文本报告
+- `swebench_<model>_<timestamp>.html` - HTML报告（可在浏览器中查看）
+- `swebench_<model>_<timestamp>.json` - JSON格式（便于程序处理）
+- `swebench_<model>_<timestamp>.csv` - CSV格式（可导入Excel）
+- `swebench_summary.txt` - 汇总报告
+
+## 架构设计
+
+```
+com.taobao.profile.swebench/
+├── SWEBenchManager.java       # 核心管理器
+├── SWEBenchConfig.java        # 配置管理
+├── task/
+│   ├── SWEBenchTask.java      # 任务定义
+│   └── TaskResult.java        # 任务结果
+├── evaluator/
+│   ├── ModelEvaluator.java    # 模型评估器
+│   ├── DockerEnvironment.java # Docker环境管理
+│   ├── ModelInterface.java    # 模型接口
+│   └── TestExecutor.java      # 测试执行器
+├── reporter/
+│   └── BenchmarkReporter.java # 报告生成器
+└── client/
+    └── SWEBenchClient.java    # 客户端程序
+```
+
+## 性能指标
+
+评测报告包含以下关键指标：
+
+1. **成功率**：成功解决的任务占比
+2. **执行时间**：每个任务的执行耗时
+3. **测试通过率**：生成代码的测试覆盖度
+4. **资源使用**：CPU、内存使用情况
+5. **API调用**：模型API调用次数
+6. **Token使用**：总Token消耗量
+7. **成本估算**：基于Token使用的成本
+
+## 集成TProfiler
+
+SWE-bench模块与TProfiler深度集成：
+
+1. 使用TProfiler的性能分析功能监控评测过程
+2. 利用TProfiler的线程分析追踪并发任务执行
+3. 通过TProfiler的慢查询分析优化Docker操作
+
+## 扩展性
+
+该模块设计为易于扩展：
+
+1. **添加新模型**：实现 `ModelInterface` 接口
+2. **自定义任务源**：扩展任务加载逻辑
+3. **新的报告格式**：在 `BenchmarkReporter` 中添加新方法
+4. **测试框架支持**：扩展 `TestExecutor` 的解析逻辑
+
+## 依赖要求
+
+- Java 6+
+- Docker
+- 网络连接（用于调用模型API和下载GitHub仓库）
+
+## 注意事项
+
+1. 确保Docker已正确安装和配置
+2. 模型API密钥请妥善保管
+3. 评测过程可能耗时较长，建议在服务器上运行
+4. 注意API调用成本，合理设置并行任务数
+
+## 未来计划
+
+1. 支持更多编程语言（目前主要支持Python）
+2. 增加更多模型支持
+3. 实现分布式评测
+4. 添加实时监控界面
+5. 支持自定义评测指标
+
+## 贡献
+
+欢迎提交Issue和Pull Request来改进这个模块！
\ No newline at end of file
diff --git a/pkg/TProfiler/bin/swebench-client b/pkg/TProfiler/bin/swebench-client
new file mode 100644
index 0000000..6ec656b
--- /dev/null
+++ b/pkg/TProfiler/bin/swebench-client
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+. $(dirname $0)/common-env
+
+MAIN_CLASS=com.taobao.profile.swebench.client.SWEBenchClient
+
+exec "$JAVACMD" -classpath $CLASS_PATH $MAIN_CLASS "$@"
\ No newline at end of file
diff --git a/pkg/TProfiler/bin/swebench-client.bat b/pkg/TProfiler/bin/swebench-client.bat
new file mode 100644
index 0000000..920db6c
--- /dev/null
+++ b/pkg/TProfiler/bin/swebench-client.bat
@@ -0,0 +1,2 @@
+@echo off
+call startup.bat com.taobao.profile.swebench.client.SWEBenchClient
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/Manager.java b/src/main/java/com/taobao/profile/Manager.java
index 4644152..8a935f7 100644
--- a/src/main/java/com/taobao/profile/Manager.java
+++ b/src/main/java/com/taobao/profile/Manager.java
@@ -47,6 +47,18 @@ public class Manager {
 	 * 远程刷出方法数据
 	 */
 	public static final String FLUSHMETHOD = "flushmethod";
+	/**
+	 * 启动SWE-bench评测
+	 */
+	public static final String SWEBENCH_START = "swebench_start";
+	/**
+	 * 停止SWE-bench评测
+	 */
+	public static final String SWEBENCH_STOP = "swebench_stop";
+	/**
+	 * 查询SWE-bench状态
+	 */
+	public static final String SWEBENCH_STATUS = "swebench_status";
 	/**
 	 * 是否用纳秒采集
 	 */
diff --git a/src/main/java/com/taobao/profile/swebench/SWEBenchConfig.java b/src/main/java/com/taobao/profile/swebench/SWEBenchConfig.java
new file mode 100644
index 0000000..072473b
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/SWEBenchConfig.java
@@ -0,0 +1,241 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * SWE-bench评测配置
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class SWEBenchConfig {
+    
+    /**
+     * 默认配置文件名
+     */
+    private static final String CONFIG_FILE = "swebench.properties";
+    
+    /**
+     * 并行任务数
+     */
+    private int parallelTaskCount = 4;
+    
+    /**
+     * 单个任务超时时间（分钟）
+     */
+    private int taskTimeoutMinutes = 30;
+    
+    /**
+     * 最大重试次数
+     */
+    private int maxRetryCount = 3;
+    
+    /**
+     * 报告输出路径
+     */
+    private String reportPath = System.getProperty("user.home") + "/swebench-reports";
+    
+    /**
+     * 任务数据路径
+     */
+    private String taskDataPath = System.getProperty("user.home") + "/swebench-tasks";
+    
+    /**
+     * 是否启用性能分析
+     */
+    private boolean enableProfiling = true;
+    
+    /**
+     * 是否保存中间结果
+     */
+    private boolean saveIntermediateResults = true;
+    
+    /**
+     * Docker镜像名称
+     */
+    private String dockerImage = "swebench/eval:latest";
+    
+    /**
+     * 评测数据集类型
+     */
+    private String datasetType = "lite"; // full, lite, verified
+    
+    /**
+     * 模型API配置
+     */
+    private String modelApiUrl;
+    private String modelApiKey;
+    private int modelMaxTokens = 4096;
+    
+    public SWEBenchConfig() {
+        loadConfig();
+    }
+    
+    /**
+     * 从配置文件加载配置
+     */
+    private void loadConfig() {
+        Properties props = new Properties();
+        
+        // 尝试从多个位置加载配置文件
+        File[] configLocations = {
+            new File(CONFIG_FILE),
+            new File(System.getProperty("user.home") + "/.tprofiler/" + CONFIG_FILE),
+            new File("conf/" + CONFIG_FILE)
+        };
+        
+        for (File configFile : configLocations) {
+            if (configFile.exists()) {
+                try (FileReader reader = new FileReader(configFile)) {
+                    props.load(reader);
+                    parseProperties(props);
+                    System.out.println("加载SWE-bench配置文件: " + configFile.getAbsolutePath());
+                    return;
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        
+        // 使用默认配置
+        System.out.println("未找到SWE-bench配置文件，使用默认配置");
+    }
+    
+    /**
+     * 解析配置属性
+     */
+    private void parseProperties(Properties props) {
+        // 基本配置
+        parallelTaskCount = Integer.parseInt(props.getProperty("swebench.parallel.tasks", "4"));
+        taskTimeoutMinutes = Integer.parseInt(props.getProperty("swebench.task.timeout", "30"));
+        maxRetryCount = Integer.parseInt(props.getProperty("swebench.max.retry", "3"));
+        
+        // 路径配置
+        reportPath = props.getProperty("swebench.report.path", reportPath);
+        taskDataPath = props.getProperty("swebench.task.path", taskDataPath);
+        
+        // 功能开关
+        enableProfiling = Boolean.parseBoolean(props.getProperty("swebench.enable.profiling", "true"));
+        saveIntermediateResults = Boolean.parseBoolean(props.getProperty("swebench.save.intermediate", "true"));
+        
+        // Docker配置
+        dockerImage = props.getProperty("swebench.docker.image", dockerImage);
+        
+        // 数据集配置
+        datasetType = props.getProperty("swebench.dataset.type", "lite");
+        
+        // 模型API配置
+        modelApiUrl = props.getProperty("swebench.model.api.url");
+        modelApiKey = props.getProperty("swebench.model.api.key");
+        modelMaxTokens = Integer.parseInt(props.getProperty("swebench.model.max.tokens", "4096"));
+    }
+    
+    // Getters and setters
+    
+    public int getParallelTaskCount() {
+        return parallelTaskCount;
+    }
+    
+    public void setParallelTaskCount(int parallelTaskCount) {
+        this.parallelTaskCount = parallelTaskCount;
+    }
+    
+    public int getTaskTimeoutMinutes() {
+        return taskTimeoutMinutes;
+    }
+    
+    public void setTaskTimeoutMinutes(int taskTimeoutMinutes) {
+        this.taskTimeoutMinutes = taskTimeoutMinutes;
+    }
+    
+    public int getMaxRetryCount() {
+        return maxRetryCount;
+    }
+    
+    public void setMaxRetryCount(int maxRetryCount) {
+        this.maxRetryCount = maxRetryCount;
+    }
+    
+    public String getReportPath() {
+        return reportPath;
+    }
+    
+    public void setReportPath(String reportPath) {
+        this.reportPath = reportPath;
+    }
+    
+    public String getTaskDataPath() {
+        return taskDataPath;
+    }
+    
+    public void setTaskDataPath(String taskDataPath) {
+        this.taskDataPath = taskDataPath;
+    }
+    
+    public boolean isEnableProfiling() {
+        return enableProfiling;
+    }
+    
+    public void setEnableProfiling(boolean enableProfiling) {
+        this.enableProfiling = enableProfiling;
+    }
+    
+    public boolean isSaveIntermediateResults() {
+        return saveIntermediateResults;
+    }
+    
+    public void setSaveIntermediateResults(boolean saveIntermediateResults) {
+        this.saveIntermediateResults = saveIntermediateResults;
+    }
+    
+    public String getDockerImage() {
+        return dockerImage;
+    }
+    
+    public void setDockerImage(String dockerImage) {
+        this.dockerImage = dockerImage;
+    }
+    
+    public String getDatasetType() {
+        return datasetType;
+    }
+    
+    public void setDatasetType(String datasetType) {
+        this.datasetType = datasetType;
+    }
+    
+    public String getModelApiUrl() {
+        return modelApiUrl;
+    }
+    
+    public void setModelApiUrl(String modelApiUrl) {
+        this.modelApiUrl = modelApiUrl;
+    }
+    
+    public String getModelApiKey() {
+        return modelApiKey;
+    }
+    
+    public void setModelApiKey(String modelApiKey) {
+        this.modelApiKey = modelApiKey;
+    }
+    
+    public int getModelMaxTokens() {
+        return modelMaxTokens;
+    }
+    
+    public void setModelMaxTokens(int modelMaxTokens) {
+        this.modelMaxTokens = modelMaxTokens;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/SWEBenchManager.java b/src/main/java/com/taobao/profile/swebench/SWEBenchManager.java
new file mode 100644
index 0000000..5faf412
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/SWEBenchManager.java
@@ -0,0 +1,234 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import com.taobao.profile.Manager;
+import com.taobao.profile.swebench.task.SWEBenchTask;
+import com.taobao.profile.swebench.task.TaskResult;
+import com.taobao.profile.swebench.evaluator.ModelEvaluator;
+import com.taobao.profile.swebench.reporter.BenchmarkReporter;
+import com.taobao.profile.swebench.loader.TaskLoader;
+
+/**
+ * SWE-bench评测管理器
+ * 负责协调AI模型在软件工程任务上的性能评测
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class SWEBenchManager {
+    
+    private static SWEBenchManager instance = new SWEBenchManager();
+    
+    /**
+     * 线程池用于并行执行评测任务
+     */
+    private ExecutorService executorService;
+    
+    /**
+     * 任务列表
+     */
+    private List<SWEBenchTask> tasks;
+    
+    /**
+     * 模型评估器
+     */
+    private ModelEvaluator evaluator;
+    
+    /**
+     * 报告生成器
+     */
+    private BenchmarkReporter reporter;
+    
+    /**
+     * 是否正在运行
+     */
+    private volatile boolean isRunning = false;
+    
+    /**
+     * 评测配置
+     */
+    private SWEBenchConfig config;
+    
+    private SWEBenchManager() {
+        this.tasks = new ArrayList<>();
+        this.config = new SWEBenchConfig();
+    }
+    
+    /**
+     * 获取单例实例
+     */
+    public static SWEBenchManager getInstance() {
+        return instance;
+    }
+    
+    /**
+     * 初始化评测环境
+     */
+    public void initialize() {
+        if (Manager.instance().isDebugMode()) {
+            System.out.println("初始化SWE-bench评测环境...");
+        }
+        
+        // 创建线程池
+        int threadCount = config.getParallelTaskCount();
+        executorService = Executors.newFixedThreadPool(threadCount);
+        
+        // 初始化评估器和报告器
+        evaluator = new ModelEvaluator(config);
+        reporter = new BenchmarkReporter(config);
+        
+        // 加载任务
+        loadTasks();
+    }
+    
+    /**
+     * 加载评测任务
+     */
+    private void loadTasks() {
+        tasks.clear();
+        
+        try {
+            // 根据配置的数据集类型加载任务
+            String datasetType = config.getDatasetType();
+            
+            if ("sample".equals(datasetType)) {
+                // 加载示例任务
+                tasks.addAll(TaskLoader.loadSampleTasks());
+            } else if ("csv".equals(datasetType)) {
+                // 从CSV文件加载
+                String csvPath = config.getTaskDataPath() + "/swebench_tasks.csv";
+                tasks.addAll(TaskLoader.loadFromCsv(csvPath));
+            } else if ("json".equals(datasetType)) {
+                // 从JSON文件加载
+                String jsonPath = config.getTaskDataPath() + "/swebench_tasks.json";
+                tasks.addAll(TaskLoader.loadFromJson(jsonPath));
+            } else {
+                // 默认加载示例任务
+                tasks.addAll(TaskLoader.loadSampleTasks());
+            }
+            
+            if (Manager.instance().isDebugMode()) {
+                System.out.println("成功加载SWE-bench任务，任务数: " + tasks.size());
+                for (SWEBenchTask task : tasks) {
+                    System.out.println("  - " + task.getTaskId() + ": " + task.getIssueTitle());
+                }
+            }
+        } catch (Exception e) {
+            System.err.println("加载任务失败: " + e.getMessage());
+            e.printStackTrace();
+            // 加载失败时使用示例任务
+            tasks.addAll(TaskLoader.loadSampleTasks());
+        }
+    }
+    
+    /**
+     * 开始评测
+     * 
+     * @param modelName 要评测的模型名称
+     * @return 是否成功开始
+     */
+    public boolean startBenchmark(String modelName) {
+        if (isRunning) {
+            System.err.println("评测已在运行中");
+            return false;
+        }
+        
+        isRunning = true;
+        System.out.println("开始SWE-bench评测，模型: " + modelName);
+        
+        // 记录开始时间
+        long startTime = System.currentTimeMillis();
+        
+        List<TaskResult> results = new ArrayList<>();
+        
+        try {
+            // 执行所有任务
+            for (SWEBenchTask task : tasks) {
+                TaskResult result = evaluator.evaluateTask(task, modelName);
+                results.add(result);
+                
+                // 实时输出进度
+                if (Manager.instance().isDebugMode()) {
+                    System.out.println("完成任务: " + task.getTaskId() + 
+                                     ", 成功: " + result.isSuccess());
+                }
+            }
+            
+            // 生成报告
+            reporter.generateReport(modelName, results, startTime);
+            
+        } catch (Exception e) {
+            e.printStackTrace();
+            return false;
+        } finally {
+            isRunning = false;
+        }
+        
+        return true;
+    }
+    
+    /**
+     * 停止评测
+     */
+    public void stopBenchmark() {
+        if (!isRunning) {
+            return;
+        }
+        
+        System.out.println("停止SWE-bench评测...");
+        isRunning = false;
+        
+        if (executorService != null) {
+            executorService.shutdownNow();
+            try {
+                executorService.awaitTermination(30, TimeUnit.SECONDS);
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+    
+    /**
+     * 获取评测状态
+     */
+    public String getStatus() {
+        return isRunning ? "运行中" : "已停止";
+    }
+    
+    /**
+     * 添加自定义任务
+     */
+    public void addTask(SWEBenchTask task) {
+        tasks.add(task);
+    }
+    
+    /**
+     * 获取配置
+     */
+    public SWEBenchConfig getConfig() {
+        return config;
+    }
+    
+    /**
+     * 清理资源
+     */
+    public void shutdown() {
+        stopBenchmark();
+        if (executorService != null && !executorService.isShutdown()) {
+            executorService.shutdown();
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/client/SWEBenchClient.java b/src/main/java/com/taobao/profile/swebench/client/SWEBenchClient.java
new file mode 100644
index 0000000..45b7c1f
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/client/SWEBenchClient.java
@@ -0,0 +1,319 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.client;
+
+import java.io.*;
+import java.net.Socket;
+import java.util.Scanner;
+
+import com.taobao.profile.swebench.SWEBenchManager;
+import com.taobao.profile.swebench.task.SWEBenchTask;
+
+/**
+ * SWE-bench客户端
+ * 用于启动和管理SWE-bench评测
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class SWEBenchClient {
+    
+    private static final String VERSION = "1.0.0";
+    
+    public static void main(String[] args) {
+        SWEBenchClient client = new SWEBenchClient();
+        
+        if (args.length == 0) {
+            client.interactiveMode();
+        } else {
+            client.commandMode(args);
+        }
+    }
+    
+    /**
+     * 命令行模式
+     */
+    private void commandMode(String[] args) {
+        String command = args[0].toLowerCase();
+        
+        switch (command) {
+            case "start":
+                if (args.length < 2) {
+                    System.err.println("用法: swebench-client start <model-name>");
+                    System.exit(1);
+                }
+                startBenchmark(args[1]);
+                break;
+                
+            case "stop":
+                stopBenchmark();
+                break;
+                
+            case "status":
+                getStatus();
+                break;
+                
+            case "help":
+            case "-h":
+            case "--help":
+                printHelp();
+                break;
+                
+            case "version":
+            case "-v":
+            case "--version":
+                System.out.println("SWE-bench Client " + VERSION);
+                break;
+                
+            case "list":
+                listModels();
+                break;
+                
+            case "config":
+                showConfig();
+                break;
+                
+            default:
+                System.err.println("未知命令: " + command);
+                System.err.println("使用 'swebench-client help' 查看帮助");
+                System.exit(1);
+        }
+    }
+    
+    /**
+     * 交互模式
+     */
+    private void interactiveMode() {
+        Scanner scanner = new Scanner(System.in);
+        
+        System.out.println("=====================================");
+        System.out.println("SWE-bench 评测客户端 v" + VERSION);
+        System.out.println("=====================================");
+        System.out.println();
+        
+        printMenu();
+        
+        while (true) {
+            System.out.print("\n请选择操作: ");
+            String input = scanner.nextLine().trim();
+            
+            switch (input) {
+                case "1":
+                    System.out.print("请输入模型名称: ");
+                    String modelName = scanner.nextLine().trim();
+                    startBenchmark(modelName);
+                    break;
+                    
+                case "2":
+                    stopBenchmark();
+                    break;
+                    
+                case "3":
+                    getStatus();
+                    break;
+                    
+                case "4":
+                    listModels();
+                    break;
+                    
+                case "5":
+                    showConfig();
+                    break;
+                    
+                case "6":
+                    addCustomTask(scanner);
+                    break;
+                    
+                case "0":
+                case "q":
+                case "quit":
+                case "exit":
+                    System.out.println("退出程序");
+                    System.exit(0);
+                    break;
+                    
+                default:
+                    System.out.println("无效的选择，请重试");
+            }
+            
+            printMenu();
+        }
+    }
+    
+    /**
+     * 打印菜单
+     */
+    private void printMenu() {
+        System.out.println("\n----- 菜单 -----");
+        System.out.println("1. 开始评测");
+        System.out.println("2. 停止评测");
+        System.out.println("3. 查看状态");
+        System.out.println("4. 列出支持的模型");
+        System.out.println("5. 查看配置");
+        System.out.println("6. 添加自定义任务");
+        System.out.println("0. 退出");
+        System.out.println("----------------");
+    }
+    
+    /**
+     * 开始评测
+     */
+    private void startBenchmark(String modelName) {
+        try {
+            System.out.println("正在启动SWE-bench评测...");
+            System.out.println("模型: " + modelName);
+            
+            // 初始化评测管理器
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.initialize();
+            
+            // 启动评测
+            boolean success = manager.startBenchmark(modelName);
+            
+            if (success) {
+                System.out.println("评测已完成");
+            } else {
+                System.err.println("评测失败");
+            }
+            
+        } catch (Exception e) {
+            System.err.println("启动评测时出错: " + e.getMessage());
+            e.printStackTrace();
+        }
+    }
+    
+    /**
+     * 停止评测
+     */
+    private void stopBenchmark() {
+        try {
+            System.out.println("正在停止评测...");
+            
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.stopBenchmark();
+            
+            System.out.println("评测已停止");
+            
+        } catch (Exception e) {
+            System.err.println("停止评测时出错: " + e.getMessage());
+        }
+    }
+    
+    /**
+     * 获取状态
+     */
+    private void getStatus() {
+        try {
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            String status = manager.getStatus();
+            
+            System.out.println("当前状态: " + status);
+            
+        } catch (Exception e) {
+            System.err.println("获取状态时出错: " + e.getMessage());
+        }
+    }
+    
+    /**
+     * 列出支持的模型
+     */
+    private void listModels() {
+        System.out.println("\n支持的模型:");
+        System.out.println("- GPT-4");
+        System.out.println("- GPT-3.5-turbo");
+        System.out.println("- Claude-2");
+        System.out.println("- Claude-instant");
+        System.out.println("- Llama-2-70b");
+        System.out.println("- CodeLlama-34b");
+        System.out.println("- StarCoder");
+        System.out.println("- Custom (需要配置API)");
+    }
+    
+    /**
+     * 显示配置
+     */
+    private void showConfig() {
+        try {
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.initialize();
+            
+            System.out.println("\n当前配置:");
+            System.out.println("并行任务数: " + manager.getConfig().getParallelTaskCount());
+            System.out.println("任务超时: " + manager.getConfig().getTaskTimeoutMinutes() + " 分钟");
+            System.out.println("最大重试: " + manager.getConfig().getMaxRetryCount() + " 次");
+            System.out.println("报告路径: " + manager.getConfig().getReportPath());
+            System.out.println("数据集类型: " + manager.getConfig().getDatasetType());
+            System.out.println("Docker镜像: " + manager.getConfig().getDockerImage());
+            System.out.println("启用性能分析: " + manager.getConfig().isEnableProfiling());
+            
+        } catch (Exception e) {
+            System.err.println("显示配置时出错: " + e.getMessage());
+        }
+    }
+    
+    /**
+     * 添加自定义任务
+     */
+    private void addCustomTask(Scanner scanner) {
+        System.out.println("\n添加自定义任务:");
+        
+        try {
+            System.out.print("任务ID: ");
+            String taskId = scanner.nextLine().trim();
+            
+            System.out.print("仓库所有者: ");
+            String repoOwner = scanner.nextLine().trim();
+            
+            System.out.print("仓库名称: ");
+            String repoName = scanner.nextLine().trim();
+            
+            System.out.print("Issue编号: ");
+            String issueNumber = scanner.nextLine().trim();
+            
+            System.out.print("Issue标题: ");
+            String issueTitle = scanner.nextLine().trim();
+            
+            System.out.print("Issue描述: ");
+            String issueDescription = scanner.nextLine().trim();
+            
+            // 创建任务
+            SWEBenchTask task = new SWEBenchTask(taskId, repoOwner, repoName);
+            task.setIssueNumber(issueNumber);
+            task.setIssueTitle(issueTitle);
+            task.setIssueDescription(issueDescription);
+            
+            // 添加到管理器
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.addTask(task);
+            
+            System.out.println("任务已添加: " + taskId);
+            
+        } catch (Exception e) {
+            System.err.println("添加任务时出错: " + e.getMessage());
+        }
+    }
+    
+    /**
+     * 打印帮助信息
+     */
+    private void printHelp() {
+        System.out.println("用法: swebench-client [命令] [参数]");
+        System.out.println();
+        System.out.println("命令:");
+        System.out.println("  start <model>  开始评测指定模型");
+        System.out.println("  stop           停止当前评测");
+        System.out.println("  status         查看评测状态");
+        System.out.println("  list           列出支持的模型");
+        System.out.println("  config         显示当前配置");
+        System.out.println("  help           显示此帮助信息");
+        System.out.println("  version        显示版本信息");
+        System.out.println();
+        System.out.println("如果不提供命令，将进入交互模式");
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/evaluator/DockerEnvironment.java b/src/main/java/com/taobao/profile/swebench/evaluator/DockerEnvironment.java
new file mode 100644
index 0000000..07cb3a9
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/evaluator/DockerEnvironment.java
@@ -0,0 +1,213 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.evaluator;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.taobao.profile.swebench.SWEBenchConfig;
+import com.taobao.profile.swebench.task.SWEBenchTask;
+
+/**
+ * Docker环境管理
+ * 负责创建和管理任务执行的Docker容器
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class DockerEnvironment {
+    
+    private SWEBenchConfig config;
+    private static final String CONTAINER_PREFIX = "swebench-";
+    
+    public DockerEnvironment(SWEBenchConfig config) {
+        this.config = config;
+    }
+    
+    /**
+     * 准备Docker容器
+     */
+    public void prepareContainer(SWEBenchTask task, String repoPath) throws IOException {
+        String containerName = getContainerName(task);
+        
+        // 检查容器是否已存在
+        if (containerExists(containerName)) {
+            // 停止并删除旧容器
+            stopContainer(containerName);
+            removeContainer(containerName);
+        }
+        
+        // 创建新容器
+        createContainer(task, containerName, repoPath);
+    }
+    
+    /**
+     * 创建容器
+     */
+    private void createContainer(SWEBenchTask task, String containerName, String repoPath) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("run");
+        command.add("-d");
+        command.add("--name");
+        command.add(containerName);
+        command.add("-v");
+        command.add(repoPath + ":/workspace");
+        command.add("-w");
+        command.add("/workspace");
+        
+        // 设置资源限制
+        command.add("--memory=4g");
+        command.add("--cpus=2");
+        
+        // 使用配置的镜像
+        command.add(config.getDockerImage());
+        command.add("sleep");
+        command.add("infinity");
+        
+        executeDockerCommand(command);
+    }
+    
+    /**
+     * 在容器中执行命令
+     */
+    public String executeInContainer(String containerName, String cmd) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("exec");
+        command.add(containerName);
+        command.add("bash");
+        command.add("-c");
+        command.add(cmd);
+        
+        return executeDockerCommand(command);
+    }
+    
+    /**
+     * 复制文件到容器
+     */
+    public void copyToContainer(String containerName, String sourcePath, String destPath) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("cp");
+        command.add(sourcePath);
+        command.add(containerName + ":" + destPath);
+        
+        executeDockerCommand(command);
+    }
+    
+    /**
+     * 从容器复制文件
+     */
+    public void copyFromContainer(String containerName, String sourcePath, String destPath) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("cp");
+        command.add(containerName + ":" + sourcePath);
+        command.add(destPath);
+        
+        executeDockerCommand(command);
+    }
+    
+    /**
+     * 清理容器
+     */
+    public void cleanupContainer(SWEBenchTask task) {
+        String containerName = getContainerName(task);
+        try {
+            stopContainer(containerName);
+            removeContainer(containerName);
+        } catch (Exception e) {
+            // 忽略清理错误
+        }
+    }
+    
+    /**
+     * 检查容器是否存在
+     */
+    private boolean containerExists(String containerName) {
+        try {
+            List<String> command = new ArrayList<>();
+            command.add("docker");
+            command.add("ps");
+            command.add("-a");
+            command.add("--format");
+            command.add("{{.Names}}");
+            
+            String output = executeDockerCommand(command);
+            return output.contains(containerName);
+        } catch (Exception e) {
+            return false;
+        }
+    }
+    
+    /**
+     * 停止容器
+     */
+    private void stopContainer(String containerName) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("stop");
+        command.add(containerName);
+        
+        executeDockerCommand(command);
+    }
+    
+    /**
+     * 删除容器
+     */
+    private void removeContainer(String containerName) throws IOException {
+        List<String> command = new ArrayList<>();
+        command.add("docker");
+        command.add("rm");
+        command.add(containerName);
+        
+        executeDockerCommand(command);
+    }
+    
+    /**
+     * 执行Docker命令
+     */
+    private String executeDockerCommand(List<String> command) throws IOException {
+        ProcessBuilder pb = new ProcessBuilder(command);
+        pb.redirectErrorStream(true);
+        
+        Process process = pb.start();
+        StringBuilder output = new StringBuilder();
+        
+        try (BufferedReader reader = new BufferedReader(
+                new InputStreamReader(process.getInputStream()))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                output.append(line).append("\n");
+            }
+        }
+        
+        try {
+            int exitCode = process.waitFor();
+            if (exitCode != 0) {
+                throw new IOException("Docker命令执行失败: " + String.join(" ", command) + 
+                                    "\n输出: " + output.toString());
+            }
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            throw new IOException("Docker命令被中断");
+        }
+        
+        return output.toString();
+    }
+    
+    /**
+     * 获取容器名称
+     */
+    public String getContainerName(SWEBenchTask task) {
+        return CONTAINER_PREFIX + task.getTaskId().toLowerCase().replaceAll("[^a-z0-9-]", "-");
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/evaluator/ModelEvaluator.java b/src/main/java/com/taobao/profile/swebench/evaluator/ModelEvaluator.java
new file mode 100644
index 0000000..689ffe4
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/evaluator/ModelEvaluator.java
@@ -0,0 +1,228 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.evaluator;
+
+import java.io.*;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.util.ArrayList;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import com.taobao.profile.Manager;
+import com.taobao.profile.Profiler;
+import com.taobao.profile.swebench.SWEBenchConfig;
+import com.taobao.profile.swebench.task.SWEBenchTask;
+import com.taobao.profile.swebench.task.TaskResult;
+
+/**
+ * 模型评估器
+ * 负责调用AI模型解决任务并评估结果
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class ModelEvaluator {
+    
+    private SWEBenchConfig config;
+    private DockerEnvironment dockerEnv;
+    private ModelInterface modelInterface;
+    
+    public ModelEvaluator(SWEBenchConfig config) {
+        this.config = config;
+        this.dockerEnv = new DockerEnvironment(config);
+        this.modelInterface = new ModelInterface(config);
+    }
+    
+    /**
+     * 评估单个任务
+     */
+    public TaskResult evaluateTask(SWEBenchTask task, String modelName) {
+        TaskResult result = new TaskResult(task.getTaskId(), modelName);
+        result.getPerformanceMetrics().setStartTime(System.currentTimeMillis());
+        
+        // 如果启用了性能分析，开始记录
+        int profileMethodId = -1;
+        if (config.isEnableProfiling() && Manager.instance().canProfile()) {
+            profileMethodId = task.getTaskId().hashCode();
+            Profiler.Start(profileMethodId);
+        }
+        
+        try {
+            // 1. 准备执行环境
+            if (Manager.instance().isDebugMode()) {
+                System.out.println("准备任务环境: " + task.getTaskId());
+            }
+            
+            prepareEnvironment(task);
+            
+            // 2. 调用模型生成解决方案
+            long startCpuTime = getCpuTime();
+            String generatedPatch = modelInterface.generateSolution(task, modelName);
+            long cpuTime = getCpuTime() - startCpuTime;
+            
+            result.setGeneratedPatch(generatedPatch);
+            result.getPerformanceMetrics().setCpuTimeMillis(cpuTime);
+            result.getPerformanceMetrics().setApiCallCount(modelInterface.getLastApiCallCount());
+            result.getPerformanceMetrics().setTokenCount(modelInterface.getLastTokenCount());
+            
+            // 3. 应用补丁并运行测试
+            TestExecutor testExecutor = new TestExecutor(dockerEnv);
+            TaskResult.TestResult testResult = testExecutor.runTests(task, generatedPatch);
+            result.setTestResult(testResult);
+            
+            // 4. 判断是否成功
+            result.setSuccess(testResult.getFailedTests() == 0 && testResult.getTotalTests() > 0);
+            
+            // 5. 收集性能数据
+            collectPerformanceData(result);
+            
+        } catch (TimeoutException e) {
+            result.setSuccess(false);
+            result.setErrorMessage("任务执行超时: " + e.getMessage());
+        } catch (Exception e) {
+            result.setSuccess(false);
+            result.setErrorMessage("任务执行失败: " + e.getMessage());
+            e.printStackTrace();
+        } finally {
+            // 记录结束时间
+            result.getPerformanceMetrics().recordEnd();
+            
+            // 结束性能分析
+            if (profileMethodId != -1 && Manager.instance().canProfile()) {
+                Profiler.End(profileMethodId);
+            }
+            
+            // 清理环境
+            cleanupEnvironment(task);
+        }
+        
+        return result;
+    }
+    
+    /**
+     * 准备执行环境
+     */
+    private void prepareEnvironment(SWEBenchTask task) throws IOException {
+        // 创建工作目录
+        File workDir = new File(config.getTaskDataPath(), task.getTaskId());
+        if (!workDir.exists()) {
+            workDir.mkdirs();
+        }
+        
+        // 克隆或更新仓库
+        String repoPath = cloneRepository(task, workDir);
+        
+        // 准备Docker容器
+        dockerEnv.prepareContainer(task, repoPath);
+    }
+    
+    /**
+     * 克隆仓库
+     */
+    private String cloneRepository(SWEBenchTask task, File workDir) throws IOException {
+        File repoDir = new File(workDir, task.getRepoName());
+        
+        if (!repoDir.exists()) {
+            // 克隆仓库
+            String cloneCmd = String.format("git clone %s %s", 
+                task.getRepoUrl(), repoDir.getAbsolutePath());
+            executeCommand(cloneCmd, workDir);
+        }
+        
+        // 切换到指定分支
+        if (task.getRepoBranch() != null) {
+            String checkoutCmd = "git checkout " + task.getRepoBranch();
+            executeCommand(checkoutCmd, repoDir);
+        }
+        
+        return repoDir.getAbsolutePath();
+    }
+    
+    /**
+     * 执行命令
+     */
+    private void executeCommand(String command, File workDir) throws IOException {
+        Process process = Runtime.getRuntime().exec(command, null, workDir);
+        try {
+            boolean finished = process.waitFor(5, TimeUnit.MINUTES);
+            if (!finished) {
+                process.destroyForcibly();
+                throw new IOException("命令执行超时: " + command);
+            }
+            
+            if (process.exitValue() != 0) {
+                throw new IOException("命令执行失败: " + command);
+            }
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            throw new IOException("命令执行被中断: " + command);
+        }
+    }
+    
+    /**
+     * 收集性能数据
+     */
+    private void collectPerformanceData(TaskResult result) {
+        // 收集内存使用
+        Runtime runtime = Runtime.getRuntime();
+        long memoryUsed = runtime.totalMemory() - runtime.freeMemory();
+        result.getPerformanceMetrics().setMemoryUsedBytes(memoryUsed);
+        
+        // 估算成本（基于token数量）
+        double costPerToken = 0.00002; // 示例成本
+        double cost = result.getPerformanceMetrics().getTokenCount() * costPerToken;
+        result.getPerformanceMetrics().setCostEstimate(cost);
+    }
+    
+    /**
+     * 清理环境
+     */
+    private void cleanupEnvironment(SWEBenchTask task) {
+        try {
+            dockerEnv.cleanupContainer(task);
+            
+            // 如果不保存中间结果，删除工作目录
+            if (!config.isSaveIntermediateResults()) {
+                File workDir = new File(config.getTaskDataPath(), task.getTaskId());
+                deleteDirectory(workDir);
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+    
+    /**
+     * 递归删除目录
+     */
+    private void deleteDirectory(File dir) {
+        if (dir.exists()) {
+            File[] files = dir.listFiles();
+            if (files != null) {
+                for (File file : files) {
+                    if (file.isDirectory()) {
+                        deleteDirectory(file);
+                    } else {
+                        file.delete();
+                    }
+                }
+            }
+            dir.delete();
+        }
+    }
+    
+    /**
+     * 获取CPU时间
+     */
+    private long getCpuTime() {
+        ThreadMXBean bean = ManagementFactory.getThreadMXBean();
+        return bean.isCurrentThreadCpuTimeSupported() ? 
+            bean.getCurrentThreadCpuTime() / 1000000L : 0L;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/evaluator/ModelInterface.java b/src/main/java/com/taobao/profile/swebench/evaluator/ModelInterface.java
new file mode 100644
index 0000000..70cd892
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/evaluator/ModelInterface.java
@@ -0,0 +1,267 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.evaluator;
+
+import java.io.*;
+import java.net.*;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.taobao.profile.swebench.SWEBenchConfig;
+import com.taobao.profile.swebench.task.SWEBenchTask;
+
+/**
+ * 模型接口
+ * 负责与AI模型进行交互，生成解决方案
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class ModelInterface {
+    
+    private SWEBenchConfig config;
+    private int lastApiCallCount = 0;
+    private int lastTokenCount = 0;
+    
+    public ModelInterface(SWEBenchConfig config) {
+        this.config = config;
+    }
+    
+    /**
+     * 调用模型生成解决方案
+     */
+    public String generateSolution(SWEBenchTask task, String modelName) throws IOException {
+        // 重置计数器
+        lastApiCallCount = 0;
+        lastTokenCount = 0;
+        
+        String prompt = buildPrompt(task);
+        String response = callModel(modelName, prompt);
+        
+        // 从响应中提取补丁
+        return extractPatch(response);
+    }
+    
+    /**
+     * 构建提示词
+     */
+    private String buildPrompt(SWEBenchTask task) {
+        StringBuilder prompt = new StringBuilder();
+        
+        // 系统提示
+        prompt.append("You are an expert software engineer. ");
+        prompt.append("Your task is to solve the following GitHub issue by generating a patch.\n\n");
+        
+        // 任务描述
+        prompt.append(task.generateTaskPrompt());
+        
+        // 指导说明
+        prompt.append("\nInstructions:\n");
+        prompt.append("1. Analyze the issue carefully\n");
+        prompt.append("2. Identify the root cause\n");
+        prompt.append("3. Generate a minimal patch that fixes the issue\n");
+        prompt.append("4. Make sure the patch follows the project's coding style\n");
+        prompt.append("5. The patch should be in unified diff format\n\n");
+        
+        prompt.append("Please provide your solution as a patch:\n");
+        
+        return prompt.toString();
+    }
+    
+    /**
+     * 调用模型API
+     */
+    private String callModel(String modelName, String prompt) throws IOException {
+        lastApiCallCount++;
+        
+        // 这里是一个简化的实现，实际应该根据不同的模型调用相应的API
+        if (config.getModelApiUrl() == null || config.getModelApiUrl().isEmpty()) {
+            // 如果没有配置API，返回模拟响应
+            return generateMockResponse(modelName, prompt);
+        }
+        
+        // 调用真实API
+        return callRealAPI(modelName, prompt);
+    }
+    
+    /**
+     * 调用真实的模型API
+     */
+    private String callRealAPI(String modelName, String prompt) throws IOException {
+        URL url = new URL(config.getModelApiUrl());
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        
+        try {
+            conn.setRequestMethod("POST");
+            conn.setRequestProperty("Content-Type", "application/json");
+            conn.setRequestProperty("Authorization", "Bearer " + config.getModelApiKey());
+            conn.setDoOutput(true);
+            
+            // 构建请求体
+            Map<String, Object> requestBody = new HashMap<>();
+            requestBody.put("model", modelName);
+            requestBody.put("prompt", prompt);
+            requestBody.put("max_tokens", config.getModelMaxTokens());
+            
+            // 发送请求
+            try (OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream())) {
+                writer.write(toJson(requestBody));
+            }
+            
+            // 读取响应
+            StringBuilder response = new StringBuilder();
+            try (BufferedReader reader = new BufferedReader(
+                    new InputStreamReader(conn.getInputStream()))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    response.append(line).append("\n");
+                }
+            }
+            
+            // 解析响应并更新token计数
+            Map<String, Object> responseData = parseJson(response.toString());
+            if (responseData.containsKey("usage")) {
+                Map<String, Object> usage = (Map<String, Object>) responseData.get("usage");
+                lastTokenCount = ((Number) usage.get("total_tokens")).intValue();
+            }
+            
+            return (String) responseData.get("content");
+            
+        } finally {
+            conn.disconnect();
+        }
+    }
+    
+    /**
+     * 生成模拟响应（用于测试）
+     */
+    private String generateMockResponse(String modelName, String prompt) {
+        lastTokenCount = prompt.length() / 4; // 粗略估算token数
+        
+        StringBuilder response = new StringBuilder();
+        response.append("Based on the issue description, here is the patch:\n\n");
+        response.append("```diff\n");
+        response.append("--- a/example.py\n");
+        response.append("+++ b/example.py\n");
+        response.append("@@ -10,7 +10,7 @@\n");
+        response.append(" def example_function():\n");
+        response.append("-    return \"old value\"\n");
+        response.append("+    return \"new value\"\n");
+        response.append(" \n");
+        response.append("```\n");
+        
+        return response.toString();
+    }
+    
+    /**
+     * 从响应中提取补丁
+     */
+    private String extractPatch(String response) {
+        // 查找diff代码块
+        int startIndex = response.indexOf("```diff");
+        if (startIndex == -1) {
+            startIndex = response.indexOf("```patch");
+        }
+        
+        if (startIndex != -1) {
+            startIndex = response.indexOf('\n', startIndex) + 1;
+            int endIndex = response.indexOf("```", startIndex);
+            if (endIndex != -1) {
+                return response.substring(startIndex, endIndex).trim();
+            }
+        }
+        
+        // 如果没有找到代码块，尝试查找diff格式
+        if (response.contains("--- ") && response.contains("+++ ")) {
+            return extractDiffFormat(response);
+        }
+        
+        // 返回整个响应作为补丁
+        return response;
+    }
+    
+    /**
+     * 提取diff格式的补丁
+     */
+    private String extractDiffFormat(String response) {
+        StringBuilder patch = new StringBuilder();
+        String[] lines = response.split("\n");
+        boolean inDiff = false;
+        
+        for (String line : lines) {
+            if (line.startsWith("--- ") || line.startsWith("+++ ") || 
+                line.startsWith("@@ ") || line.startsWith("+") || 
+                line.startsWith("-") || line.startsWith(" ")) {
+                inDiff = true;
+                patch.append(line).append("\n");
+            } else if (inDiff && !line.trim().isEmpty() && 
+                      !line.startsWith("+") && !line.startsWith("-")) {
+                // 结束diff部分
+                break;
+            }
+        }
+        
+        return patch.toString().trim();
+    }
+    
+    /**
+     * 简单的JSON序列化
+     */
+    private String toJson(Map<String, Object> map) {
+        // 这里应该使用真正的JSON库，这只是一个简化示例
+        StringBuilder json = new StringBuilder("{");
+        boolean first = true;
+        
+        for (Map.Entry<String, Object> entry : map.entrySet()) {
+            if (!first) json.append(",");
+            json.append("\"").append(entry.getKey()).append("\":");
+            
+            if (entry.getValue() instanceof String) {
+                json.append("\"").append(escapeJson((String) entry.getValue())).append("\"");
+            } else {
+                json.append(entry.getValue());
+            }
+            first = false;
+        }
+        
+        json.append("}");
+        return json.toString();
+    }
+    
+    /**
+     * 转义JSON字符串
+     */
+    private String escapeJson(String value) {
+        return value.replace("\\", "\\\\")
+                   .replace("\"", "\\\"")
+                   .replace("\n", "\\n")
+                   .replace("\r", "\\r")
+                   .replace("\t", "\\t");
+    }
+    
+    /**
+     * 简单的JSON解析
+     */
+    private Map<String, Object> parseJson(String json) {
+        // 这里应该使用真正的JSON库，这只是一个简化示例
+        Map<String, Object> result = new HashMap<>();
+        // TODO: 实现JSON解析
+        return result;
+    }
+    
+    // Getters
+    
+    public int getLastApiCallCount() {
+        return lastApiCallCount;
+    }
+    
+    public int getLastTokenCount() {
+        return lastTokenCount;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/evaluator/TestExecutor.java b/src/main/java/com/taobao/profile/swebench/evaluator/TestExecutor.java
new file mode 100644
index 0000000..62428e1
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/evaluator/TestExecutor.java
@@ -0,0 +1,224 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.evaluator;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.taobao.profile.swebench.task.SWEBenchTask;
+import com.taobao.profile.swebench.task.TaskResult;
+
+/**
+ * 测试执行器
+ * 负责应用补丁并执行测试
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class TestExecutor {
+    
+    private DockerEnvironment dockerEnv;
+    
+    public TestExecutor(DockerEnvironment dockerEnv) {
+        this.dockerEnv = dockerEnv;
+    }
+    
+    /**
+     * 运行测试
+     */
+    public TaskResult.TestResult runTests(SWEBenchTask task, String patch) throws IOException {
+        TaskResult.TestResult result = new TaskResult.TestResult();
+        String containerName = dockerEnv.getContainerName(task);
+        
+        try {
+            // 1. 应用补丁
+            applyPatch(containerName, patch);
+            
+            // 2. 运行测试命令
+            List<String> testOutputs = new ArrayList<>();
+            
+            if (task.getTestCommands() != null && !task.getTestCommands().isEmpty()) {
+                for (String testCommand : task.getTestCommands()) {
+                    String output = dockerEnv.executeInContainer(containerName, testCommand);
+                    testOutputs.add(output);
+                }
+            } else {
+                // 使用默认测试命令
+                String output = runDefaultTests(containerName, task);
+                testOutputs.add(output);
+            }
+            
+            // 3. 解析测试结果
+            parseTestResults(testOutputs, result);
+            
+            // 4. 检查失败的测试
+            if (task.getFailingTests() != null) {
+                checkFailingTests(containerName, task.getFailingTests(), result);
+            }
+            
+        } catch (Exception e) {
+            result.setTestOutput("测试执行失败: " + e.getMessage());
+            result.setTotalTests(1);
+            result.setFailedTests(1);
+        }
+        
+        return result;
+    }
+    
+    /**
+     * 应用补丁
+     */
+    private void applyPatch(String containerName, String patch) throws IOException {
+        // 将补丁保存到临时文件
+        File patchFile = File.createTempFile("patch", ".diff");
+        try (FileWriter writer = new FileWriter(patchFile)) {
+            writer.write(patch);
+        }
+        
+        // 复制补丁到容器
+        dockerEnv.copyToContainer(containerName, patchFile.getAbsolutePath(), "/tmp/patch.diff");
+        
+        // 应用补丁
+        String applyCommand = "cd /workspace && git apply /tmp/patch.diff";
+        String output = dockerEnv.executeInContainer(containerName, applyCommand);
+        
+        // 清理临时文件
+        patchFile.delete();
+        
+        // 检查补丁是否应用成功
+        if (output.contains("error") || output.contains("failed")) {
+            throw new IOException("补丁应用失败: " + output);
+        }
+    }
+    
+    /**
+     * 运行默认测试
+     */
+    private String runDefaultTests(String containerName, SWEBenchTask task) throws IOException {
+        // 尝试常见的测试命令
+        String[] testCommands = {
+            "python -m pytest",
+            "python -m unittest discover",
+            "npm test",
+            "mvn test",
+            "gradle test",
+            "make test"
+        };
+        
+        for (String command : testCommands) {
+            try {
+                String output = dockerEnv.executeInContainer(containerName, 
+                    "cd /workspace && " + command + " 2>&1 || true");
+                if (!output.contains("command not found")) {
+                    return output;
+                }
+            } catch (Exception e) {
+                // 忽略错误，尝试下一个命令
+            }
+        }
+        
+        return "No test command found";
+    }
+    
+    /**
+     * 解析测试结果
+     */
+    private void parseTestResults(List<String> outputs, TaskResult.TestResult result) {
+        int totalTests = 0;
+        int passedTests = 0;
+        int failedTests = 0;
+        List<String> failedTestNames = new ArrayList<>();
+        StringBuilder fullOutput = new StringBuilder();
+        
+        for (String output : outputs) {
+            fullOutput.append(output).append("\n");
+            
+            // 解析pytest输出
+            if (output.contains("passed") || output.contains("failed")) {
+                Pattern pytestPattern = Pattern.compile("(\\d+) passed.*?(\\d+) failed");
+                Matcher matcher = pytestPattern.matcher(output);
+                if (matcher.find()) {
+                    passedTests += Integer.parseInt(matcher.group(1));
+                    failedTests += Integer.parseInt(matcher.group(2));
+                }
+            }
+            
+            // 解析unittest输出
+            if (output.contains("Ran") && output.contains("tests")) {
+                Pattern unittestPattern = Pattern.compile("Ran (\\d+) tests?");
+                Matcher matcher = unittestPattern.matcher(output);
+                if (matcher.find()) {
+                    totalTests = Integer.parseInt(matcher.group(1));
+                }
+                
+                if (output.contains("OK")) {
+                    passedTests = totalTests;
+                } else if (output.contains("FAILED")) {
+                    Pattern failPattern = Pattern.compile("failures=(\\d+)");
+                    matcher = failPattern.matcher(output);
+                    if (matcher.find()) {
+                        failedTests = Integer.parseInt(matcher.group(1));
+                        passedTests = totalTests - failedTests;
+                    }
+                }
+            }
+            
+            // 提取失败的测试名称
+            String[] lines = output.split("\n");
+            for (String line : lines) {
+                if (line.contains("FAILED") || line.contains("FAIL:")) {
+                    failedTestNames.add(line.trim());
+                }
+            }
+        }
+        
+        // 如果没有解析到总测试数，根据已知数据计算
+        if (totalTests == 0) {
+            totalTests = passedTests + failedTests;
+        }
+        
+        result.setTotalTests(totalTests);
+        result.setPassedTests(passedTests);
+        result.setFailedTests(failedTests);
+        result.setFailedTestNames(failedTestNames);
+        result.setTestOutput(fullOutput.toString());
+    }
+    
+    /**
+     * 检查特定的失败测试
+     */
+    private void checkFailingTests(String containerName, List<String> failingTests, 
+                                  TaskResult.TestResult result) throws IOException {
+        List<String> stillFailing = new ArrayList<>();
+        
+        for (String testName : failingTests) {
+            // 运行单个测试
+            String command = String.format("cd /workspace && python -m pytest %s -v 2>&1 || true", testName);
+            String output = dockerEnv.executeInContainer(containerName, command);
+            
+            if (output.contains("FAILED") || output.contains("ERROR")) {
+                stillFailing.add(testName);
+            }
+        }
+        
+        // 更新失败的测试列表
+        if (!stillFailing.isEmpty()) {
+            result.setFailedTestNames(stillFailing);
+            result.setFailedTests(stillFailing.size());
+            
+            // 调整通过的测试数
+            if (result.getTotalTests() > 0) {
+                result.setPassedTests(result.getTotalTests() - stillFailing.size());
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/loader/TaskLoader.java b/src/main/java/com/taobao/profile/swebench/loader/TaskLoader.java
new file mode 100644
index 0000000..4053b6f
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/loader/TaskLoader.java
@@ -0,0 +1,163 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.loader;
+
+import java.io.*;
+import java.util.*;
+
+import com.taobao.profile.swebench.task.SWEBenchTask;
+
+/**
+ * 任务加载器
+ * 负责从各种数据源加载SWE-bench任务
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class TaskLoader {
+    
+    /**
+     * 加载示例任务
+     * 这些是一些典型的SWE-bench任务示例
+     */
+    public static List<SWEBenchTask> loadSampleTasks() {
+        List<SWEBenchTask> tasks = new ArrayList<>();
+        
+        // 示例任务1：简单的bug修复
+        SWEBenchTask task1 = new SWEBenchTask("sample-001", "example", "calculator");
+        task1.setIssueNumber("123");
+        task1.setIssueTitle("Division by zero error in calculate method");
+        task1.setIssueDescription(
+            "When calling calculate(10, 0, '/'), the method throws an unhandled exception.\n" +
+            "Expected behavior: Should return an error message instead of throwing exception."
+        );
+        task1.setRepoBranch("main");
+        task1.setDifficultyLevel(2);
+        task1.setExpectedTimeMinutes(15);
+        
+        List<String> failingTests1 = new ArrayList<>();
+        failingTests1.add("test_division_by_zero");
+        task1.setFailingTests(failingTests1);
+        
+        tasks.add(task1);
+        
+        // 示例任务2：功能增强
+        SWEBenchTask task2 = new SWEBenchTask("sample-002", "example", "string-utils");
+        task2.setIssueNumber("456");
+        task2.setIssueTitle("Add support for case-insensitive string comparison");
+        task2.setIssueDescription(
+            "The current compare() method is case-sensitive only.\n" +
+            "Please add an optional parameter to enable case-insensitive comparison."
+        );
+        task2.setRepoBranch("develop");
+        task2.setDifficultyLevel(3);
+        task2.setExpectedTimeMinutes(30);
+        task2.setTaskType(SWEBenchTask.TaskType.FEATURE);
+        
+        tasks.add(task2);
+        
+        // 示例任务3：性能优化
+        SWEBenchTask task3 = new SWEBenchTask("sample-003", "example", "data-processor");
+        task3.setIssueNumber("789");
+        task3.setIssueTitle("Optimize large file processing performance");
+        task3.setIssueDescription(
+            "Processing files larger than 100MB takes too long.\n" +
+            "Current implementation loads entire file into memory.\n" +
+            "Please implement streaming processing to improve performance."
+        );
+        task3.setRepoBranch("performance");
+        task3.setDifficultyLevel(4);
+        task3.setExpectedTimeMinutes(60);
+        task3.setTaskType(SWEBenchTask.TaskType.REFACTOR);
+        
+        tasks.add(task3);
+        
+        return tasks;
+    }
+    
+    /**
+     * 从JSON文件加载任务
+     */
+    public static List<SWEBenchTask> loadFromJson(String filePath) throws IOException {
+        List<SWEBenchTask> tasks = new ArrayList<>();
+        
+        // 简化的JSON解析实现
+        // 实际应该使用JSON库如Jackson或Gson
+        try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
+            // TODO: 实现JSON解析逻辑
+            // 这里只是示例框架
+        }
+        
+        return tasks;
+    }
+    
+    /**
+     * 从CSV文件加载任务
+     */
+    public static List<SWEBenchTask> loadFromCsv(String filePath) throws IOException {
+        List<SWEBenchTask> tasks = new ArrayList<>();
+        
+        try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
+            String line;
+            boolean isHeader = true;
+            
+            while ((line = reader.readLine()) != null) {
+                if (isHeader) {
+                    isHeader = false;
+                    continue;
+                }
+                
+                String[] parts = line.split(",");
+                if (parts.length >= 6) {
+                    SWEBenchTask task = new SWEBenchTask(
+                        parts[0].trim(), // taskId
+                        parts[1].trim(), // repoOwner
+                        parts[2].trim()  // repoName
+                    );
+                    task.setIssueNumber(parts[3].trim());
+                    task.setIssueTitle(parts[4].trim());
+                    task.setIssueDescription(parts[5].trim());
+                    
+                    tasks.add(task);
+                }
+            }
+        }
+        
+        return tasks;
+    }
+    
+    /**
+     * 从GitHub API加载任务
+     * 注意：需要配置GitHub API token
+     */
+    public static List<SWEBenchTask> loadFromGitHub(String owner, String repo, String label) {
+        List<SWEBenchTask> tasks = new ArrayList<>();
+        
+        // TODO: 实现GitHub API调用
+        // 1. 获取指定标签的issues
+        // 2. 转换为SWEBenchTask对象
+        // 3. 获取相关的测试信息
+        
+        return tasks;
+    }
+    
+    /**
+     * 从Hugging Face数据集加载
+     * 这是官方SWE-bench数据集的来源
+     */
+    public static List<SWEBenchTask> loadFromHuggingFace(String datasetType) {
+        List<SWEBenchTask> tasks = new ArrayList<>();
+        
+        // TODO: 实现Hugging Face数据集加载
+        // 使用datasets库或REST API
+        // 数据集名称：princeton-nlp/SWE-bench
+        
+        return tasks;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/reporter/BenchmarkReporter.java b/src/main/java/com/taobao/profile/swebench/reporter/BenchmarkReporter.java
new file mode 100644
index 0000000..fea7fa8
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/reporter/BenchmarkReporter.java
@@ -0,0 +1,379 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.reporter;
+
+import java.io.*;
+import java.text.SimpleDateFormat;
+import java.util.*;
+
+import com.taobao.profile.swebench.SWEBenchConfig;
+import com.taobao.profile.swebench.task.TaskResult;
+import com.taobao.profile.utils.DailyRollingFileWriter;
+
+/**
+ * 基准测试报告生成器
+ * 负责生成评测结果报告
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class BenchmarkReporter {
+    
+    private SWEBenchConfig config;
+    private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    private SimpleDateFormat fileFormat = new SimpleDateFormat("yyyyMMdd_HHmmss");
+    
+    public BenchmarkReporter(SWEBenchConfig config) {
+        this.config = config;
+    }
+    
+    /**
+     * 生成报告
+     */
+    public void generateReport(String modelName, List<TaskResult> results, long startTime) {
+        try {
+            // 创建报告目录
+            File reportDir = new File(config.getReportPath());
+            if (!reportDir.exists()) {
+                reportDir.mkdirs();
+            }
+            
+            // 生成多种格式的报告
+            generateTextReport(modelName, results, startTime);
+            generateHtmlReport(modelName, results, startTime);
+            generateJsonReport(modelName, results, startTime);
+            generateCsvReport(modelName, results, startTime);
+            
+            // 生成汇总报告
+            generateSummaryReport(modelName, results, startTime);
+            
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+    
+    /**
+     * 生成文本报告
+     */
+    private void generateTextReport(String modelName, List<TaskResult> results, long startTime) 
+            throws IOException {
+        String fileName = String.format("swebench_%s_%s.txt", 
+            modelName.replaceAll("[^a-zA-Z0-9]", "_"), fileFormat.format(new Date()));
+        File reportFile = new File(config.getReportPath(), fileName);
+        
+        try (PrintWriter writer = new PrintWriter(new FileWriter(reportFile))) {
+            writer.println("=====================================");
+            writer.println("SWE-bench 评测报告");
+            writer.println("=====================================");
+            writer.println();
+            writer.println("模型: " + modelName);
+            writer.println("开始时间: " + dateFormat.format(new Date(startTime)));
+            writer.println("结束时间: " + dateFormat.format(new Date()));
+            writer.println("总耗时: " + formatDuration(System.currentTimeMillis() - startTime));
+            writer.println();
+            
+            // 统计信息
+            generateStatistics(writer, results);
+            
+            // 详细结果
+            writer.println("\n详细结果:");
+            writer.println("-------------------------------------");
+            
+            for (TaskResult result : results) {
+                writer.println("\n任务ID: " + result.getTaskId());
+                writer.println("状态: " + (result.isSuccess() ? "成功" : "失败"));
+                writer.println("执行时间: " + result.getPerformanceMetrics().getExecutionTimeMillis() + "ms");
+                writer.println("测试通过率: " + String.format("%.2f%%", result.getTestResult().getPassRate()));
+                writer.println("测试结果: " + result.getTestResult().getPassedTests() + "/" + 
+                              result.getTestResult().getTotalTests());
+                
+                if (!result.isSuccess() && result.getErrorMessage() != null) {
+                    writer.println("错误信息: " + result.getErrorMessage());
+                }
+                
+                writer.println("-------------------------------------");
+            }
+        }
+        
+        System.out.println("文本报告已生成: " + reportFile.getAbsolutePath());
+    }
+    
+    /**
+     * 生成HTML报告
+     */
+    private void generateHtmlReport(String modelName, List<TaskResult> results, long startTime) 
+            throws IOException {
+        String fileName = String.format("swebench_%s_%s.html", 
+            modelName.replaceAll("[^a-zA-Z0-9]", "_"), fileFormat.format(new Date()));
+        File reportFile = new File(config.getReportPath(), fileName);
+        
+        try (PrintWriter writer = new PrintWriter(new FileWriter(reportFile))) {
+            writer.println("<!DOCTYPE html>");
+            writer.println("<html>");
+            writer.println("<head>");
+            writer.println("<meta charset=\"UTF-8\">");
+            writer.println("<title>SWE-bench 评测报告 - " + modelName + "</title>");
+            writer.println("<style>");
+            writer.println("body { font-family: Arial, sans-serif; margin: 20px; }");
+            writer.println("h1 { color: #333; }");
+            writer.println("table { border-collapse: collapse; width: 100%; margin-top: 20px; }");
+            writer.println("th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }");
+            writer.println("th { background-color: #f2f2f2; }");
+            writer.println(".success { color: green; }");
+            writer.println(".failure { color: red; }");
+            writer.println(".stats { background-color: #f9f9f9; padding: 15px; margin: 20px 0; }");
+            writer.println("</style>");
+            writer.println("</head>");
+            writer.println("<body>");
+            
+            writer.println("<h1>SWE-bench 评测报告</h1>");
+            writer.println("<div class=\"stats\">");
+            writer.println("<p><strong>模型:</strong> " + modelName + "</p>");
+            writer.println("<p><strong>开始时间:</strong> " + dateFormat.format(new Date(startTime)) + "</p>");
+            writer.println("<p><strong>结束时间:</strong> " + dateFormat.format(new Date()) + "</p>");
+            writer.println("<p><strong>总耗时:</strong> " + formatDuration(System.currentTimeMillis() - startTime) + "</p>");
+            
+            // 统计信息
+            int totalTasks = results.size();
+            int successTasks = 0;
+            double totalCost = 0;
+            long totalTokens = 0;
+            
+            for (TaskResult result : results) {
+                if (result.isSuccess()) successTasks++;
+                totalCost += result.getPerformanceMetrics().getCostEstimate();
+                totalTokens += result.getPerformanceMetrics().getTokenCount();
+            }
+            
+            writer.println("<p><strong>总任务数:</strong> " + totalTasks + "</p>");
+            writer.println("<p><strong>成功数:</strong> " + successTasks + "</p>");
+            writer.println("<p><strong>成功率:</strong> " + String.format("%.2f%%", (double)successTasks/totalTasks*100) + "</p>");
+            writer.println("<p><strong>总成本:</strong> $" + String.format("%.4f", totalCost) + "</p>");
+            writer.println("<p><strong>总Token数:</strong> " + totalTokens + "</p>");
+            writer.println("</div>");
+            
+            // 结果表格
+            writer.println("<h2>详细结果</h2>");
+            writer.println("<table>");
+            writer.println("<tr>");
+            writer.println("<th>任务ID</th>");
+            writer.println("<th>状态</th>");
+            writer.println("<th>执行时间(ms)</th>");
+            writer.println("<th>测试通过率</th>");
+            writer.println("<th>API调用</th>");
+            writer.println("<th>Token数</th>");
+            writer.println("<th>成本</th>");
+            writer.println("</tr>");
+            
+            for (TaskResult result : results) {
+                writer.println("<tr>");
+                writer.println("<td>" + result.getTaskId() + "</td>");
+                writer.println("<td class=\"" + (result.isSuccess() ? "success" : "failure") + "\">" + 
+                              (result.isSuccess() ? "成功" : "失败") + "</td>");
+                writer.println("<td>" + result.getPerformanceMetrics().getExecutionTimeMillis() + "</td>");
+                writer.println("<td>" + String.format("%.2f%%", result.getTestResult().getPassRate()) + "</td>");
+                writer.println("<td>" + result.getPerformanceMetrics().getApiCallCount() + "</td>");
+                writer.println("<td>" + result.getPerformanceMetrics().getTokenCount() + "</td>");
+                writer.println("<td>$" + String.format("%.4f", result.getPerformanceMetrics().getCostEstimate()) + "</td>");
+                writer.println("</tr>");
+            }
+            
+            writer.println("</table>");
+            writer.println("</body>");
+            writer.println("</html>");
+        }
+        
+        System.out.println("HTML报告已生成: " + reportFile.getAbsolutePath());
+    }
+    
+    /**
+     * 生成JSON报告
+     */
+    private void generateJsonReport(String modelName, List<TaskResult> results, long startTime) 
+            throws IOException {
+        String fileName = String.format("swebench_%s_%s.json", 
+            modelName.replaceAll("[^a-zA-Z0-9]", "_"), fileFormat.format(new Date()));
+        File reportFile = new File(config.getReportPath(), fileName);
+        
+        try (PrintWriter writer = new PrintWriter(new FileWriter(reportFile))) {
+            writer.println("{");
+            writer.println("  \"model\": \"" + modelName + "\",");
+            writer.println("  \"startTime\": \"" + dateFormat.format(new Date(startTime)) + "\",");
+            writer.println("  \"endTime\": \"" + dateFormat.format(new Date()) + "\",");
+            writer.println("  \"duration\": " + (System.currentTimeMillis() - startTime) + ",");
+            writer.println("  \"results\": [");
+            
+            for (int i = 0; i < results.size(); i++) {
+                TaskResult result = results.get(i);
+                writer.println("    {");
+                writer.println("      \"taskId\": \"" + result.getTaskId() + "\",");
+                writer.println("      \"success\": " + result.isSuccess() + ",");
+                writer.println("      \"executionTime\": " + result.getPerformanceMetrics().getExecutionTimeMillis() + ",");
+                writer.println("      \"testPassRate\": " + result.getTestResult().getPassRate() + ",");
+                writer.println("      \"apiCalls\": " + result.getPerformanceMetrics().getApiCallCount() + ",");
+                writer.println("      \"tokens\": " + result.getPerformanceMetrics().getTokenCount() + ",");
+                writer.println("      \"cost\": " + result.getPerformanceMetrics().getCostEstimate());
+                writer.print("    }");
+                if (i < results.size() - 1) writer.print(",");
+                writer.println();
+            }
+            
+            writer.println("  ]");
+            writer.println("}");
+        }
+        
+        System.out.println("JSON报告已生成: " + reportFile.getAbsolutePath());
+    }
+    
+    /**
+     * 生成CSV报告
+     */
+    private void generateCsvReport(String modelName, List<TaskResult> results, long startTime) 
+            throws IOException {
+        String fileName = String.format("swebench_%s_%s.csv", 
+            modelName.replaceAll("[^a-zA-Z0-9]", "_"), fileFormat.format(new Date()));
+        File reportFile = new File(config.getReportPath(), fileName);
+        
+        try (PrintWriter writer = new PrintWriter(new FileWriter(reportFile))) {
+            // CSV头
+            writer.println("TaskID,Model,Success,ExecutionTime(ms),TestPassRate(%),PassedTests,TotalTests,APIcalls,Tokens,Cost($)");
+            
+            // 数据行
+            for (TaskResult result : results) {
+                writer.printf("%s,%s,%s,%d,%.2f,%d,%d,%d,%d,%.4f\n",
+                    result.getTaskId(),
+                    modelName,
+                    result.isSuccess(),
+                    result.getPerformanceMetrics().getExecutionTimeMillis(),
+                    result.getTestResult().getPassRate(),
+                    result.getTestResult().getPassedTests(),
+                    result.getTestResult().getTotalTests(),
+                    result.getPerformanceMetrics().getApiCallCount(),
+                    result.getPerformanceMetrics().getTokenCount(),
+                    result.getPerformanceMetrics().getCostEstimate()
+                );
+            }
+        }
+        
+        System.out.println("CSV报告已生成: " + reportFile.getAbsolutePath());
+    }
+    
+    /**
+     * 生成汇总报告
+     */
+    private void generateSummaryReport(String modelName, List<TaskResult> results, long startTime) 
+            throws IOException {
+        File summaryFile = new File(config.getReportPath(), "swebench_summary.txt");
+        
+        // 追加模式写入
+        try (PrintWriter writer = new PrintWriter(new FileWriter(summaryFile, true))) {
+            int successCount = 0;
+            double totalCost = 0;
+            long totalTime = 0;
+            
+            for (TaskResult result : results) {
+                if (result.isSuccess()) successCount++;
+                totalCost += result.getPerformanceMetrics().getCostEstimate();
+                totalTime += result.getPerformanceMetrics().getExecutionTimeMillis();
+            }
+            
+            writer.printf("%s | %s | 任务数: %d | 成功: %d (%.2f%%) | 总耗时: %s | 总成本: $%.4f\n",
+                dateFormat.format(new Date()),
+                modelName,
+                results.size(),
+                successCount,
+                (double)successCount/results.size()*100,
+                formatDuration(totalTime),
+                totalCost
+            );
+        }
+    }
+    
+    /**
+     * 生成统计信息
+     */
+    private void generateStatistics(PrintWriter writer, List<TaskResult> results) {
+        int totalTasks = results.size();
+        int successTasks = 0;
+        int failedTasks = 0;
+        long totalExecutionTime = 0;
+        long totalCpuTime = 0;
+        long totalMemory = 0;
+        int totalApiCalls = 0;
+        int totalTokens = 0;
+        double totalCost = 0;
+        
+        Map<Integer, Integer> difficultyDistribution = new HashMap<>();
+        
+        for (TaskResult result : results) {
+            if (result.isSuccess()) {
+                successTasks++;
+            } else {
+                failedTasks++;
+            }
+            
+            totalExecutionTime += result.getPerformanceMetrics().getExecutionTimeMillis();
+            totalCpuTime += result.getPerformanceMetrics().getCpuTimeMillis();
+            totalMemory += result.getPerformanceMetrics().getMemoryUsedBytes();
+            totalApiCalls += result.getPerformanceMetrics().getApiCallCount();
+            totalTokens += result.getPerformanceMetrics().getTokenCount();
+            totalCost += result.getPerformanceMetrics().getCostEstimate();
+        }
+        
+        writer.println("统计信息:");
+        writer.println("-------------------------------------");
+        writer.println("总任务数: " + totalTasks);
+        writer.println("成功数: " + successTasks);
+        writer.println("失败数: " + failedTasks);
+        writer.println("成功率: " + String.format("%.2f%%", (double)successTasks/totalTasks*100));
+        writer.println();
+        writer.println("性能指标:");
+        writer.println("平均执行时间: " + (totalTasks > 0 ? totalExecutionTime/totalTasks : 0) + "ms");
+        writer.println("平均CPU时间: " + (totalTasks > 0 ? totalCpuTime/totalTasks : 0) + "ms");
+        writer.println("平均内存使用: " + formatBytes(totalTasks > 0 ? totalMemory/totalTasks : 0));
+        writer.println();
+        writer.println("API使用:");
+        writer.println("总API调用: " + totalApiCalls);
+        writer.println("总Token数: " + totalTokens);
+        writer.println("总成本: $" + String.format("%.4f", totalCost));
+        writer.println("平均成本: $" + String.format("%.4f", totalTasks > 0 ? totalCost/totalTasks : 0));
+    }
+    
+    /**
+     * 格式化时长
+     */
+    private String formatDuration(long millis) {
+        long seconds = millis / 1000;
+        long minutes = seconds / 60;
+        long hours = minutes / 60;
+        
+        if (hours > 0) {
+            return String.format("%d小时%d分钟%d秒", hours, minutes % 60, seconds % 60);
+        } else if (minutes > 0) {
+            return String.format("%d分钟%d秒", minutes, seconds % 60);
+        } else {
+            return String.format("%d秒", seconds);
+        }
+    }
+    
+    /**
+     * 格式化字节数
+     */
+    private String formatBytes(long bytes) {
+        if (bytes < 1024) {
+            return bytes + " B";
+        } else if (bytes < 1024 * 1024) {
+            return String.format("%.2f KB", bytes / 1024.0);
+        } else if (bytes < 1024 * 1024 * 1024) {
+            return String.format("%.2f MB", bytes / (1024.0 * 1024));
+        } else {
+            return String.format("%.2f GB", bytes / (1024.0 * 1024 * 1024));
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/task/SWEBenchTask.java b/src/main/java/com/taobao/profile/swebench/task/SWEBenchTask.java
new file mode 100644
index 0000000..ca5c379
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/task/SWEBenchTask.java
@@ -0,0 +1,277 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.task;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * SWE-bench任务定义
+ * 代表一个需要AI模型解决的软件工程问题
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class SWEBenchTask {
+    
+    /**
+     * 任务ID
+     */
+    private String taskId;
+    
+    /**
+     * GitHub仓库信息
+     */
+    private String repoOwner;
+    private String repoName;
+    private String repoBranch;
+    
+    /**
+     * Issue信息
+     */
+    private String issueNumber;
+    private String issueTitle;
+    private String issueDescription;
+    
+    /**
+     * 测试相关
+     */
+    private List<String> testCommands;
+    private List<String> failingTests;
+    
+    /**
+     * 预期的代码变更文件
+     */
+    private List<String> expectedFiles;
+    
+    /**
+     * 任务难度等级 (1-5)
+     */
+    private int difficultyLevel;
+    
+    /**
+     * 任务类型
+     */
+    private TaskType taskType;
+    
+    /**
+     * 额外的元数据
+     */
+    private Map<String, Object> metadata;
+    
+    /**
+     * 基准解决方案（用于对比）
+     */
+    private String baselinePatch;
+    
+    /**
+     * 任务创建时间
+     */
+    private long createTime;
+    
+    /**
+     * 预期完成时间（分钟）
+     */
+    private int expectedTimeMinutes;
+    
+    public enum TaskType {
+        BUG_FIX("bug_fix"),
+        FEATURE("feature"),
+        REFACTOR("refactor"),
+        TEST("test"),
+        DOCUMENTATION("documentation");
+        
+        private String value;
+        
+        TaskType(String value) {
+            this.value = value;
+        }
+        
+        public String getValue() {
+            return value;
+        }
+    }
+    
+    public SWEBenchTask() {
+        this.createTime = System.currentTimeMillis();
+        this.taskType = TaskType.BUG_FIX;
+        this.difficultyLevel = 3;
+    }
+    
+    public SWEBenchTask(String taskId, String repoOwner, String repoName) {
+        this();
+        this.taskId = taskId;
+        this.repoOwner = repoOwner;
+        this.repoName = repoName;
+    }
+    
+    /**
+     * 生成任务的完整描述
+     */
+    public String generateTaskPrompt() {
+        StringBuilder prompt = new StringBuilder();
+        prompt.append("Repository: ").append(repoOwner).append("/").append(repoName).append("\n");
+        prompt.append("Branch: ").append(repoBranch).append("\n");
+        prompt.append("Issue #").append(issueNumber).append(": ").append(issueTitle).append("\n\n");
+        prompt.append("Description:\n").append(issueDescription).append("\n\n");
+        
+        if (failingTests != null && !failingTests.isEmpty()) {
+            prompt.append("Failing tests:\n");
+            for (String test : failingTests) {
+                prompt.append("- ").append(test).append("\n");
+            }
+        }
+        
+        return prompt.toString();
+    }
+    
+    /**
+     * 获取GitHub仓库URL
+     */
+    public String getRepoUrl() {
+        return String.format("https://github.com/%s/%s", repoOwner, repoName);
+    }
+    
+    /**
+     * 获取Issue URL
+     */
+    public String getIssueUrl() {
+        return String.format("%s/issues/%s", getRepoUrl(), issueNumber);
+    }
+    
+    // Getters and setters
+    
+    public String getTaskId() {
+        return taskId;
+    }
+    
+    public void setTaskId(String taskId) {
+        this.taskId = taskId;
+    }
+    
+    public String getRepoOwner() {
+        return repoOwner;
+    }
+    
+    public void setRepoOwner(String repoOwner) {
+        this.repoOwner = repoOwner;
+    }
+    
+    public String getRepoName() {
+        return repoName;
+    }
+    
+    public void setRepoName(String repoName) {
+        this.repoName = repoName;
+    }
+    
+    public String getRepoBranch() {
+        return repoBranch;
+    }
+    
+    public void setRepoBranch(String repoBranch) {
+        this.repoBranch = repoBranch;
+    }
+    
+    public String getIssueNumber() {
+        return issueNumber;
+    }
+    
+    public void setIssueNumber(String issueNumber) {
+        this.issueNumber = issueNumber;
+    }
+    
+    public String getIssueTitle() {
+        return issueTitle;
+    }
+    
+    public void setIssueTitle(String issueTitle) {
+        this.issueTitle = issueTitle;
+    }
+    
+    public String getIssueDescription() {
+        return issueDescription;
+    }
+    
+    public void setIssueDescription(String issueDescription) {
+        this.issueDescription = issueDescription;
+    }
+    
+    public List<String> getTestCommands() {
+        return testCommands;
+    }
+    
+    public void setTestCommands(List<String> testCommands) {
+        this.testCommands = testCommands;
+    }
+    
+    public List<String> getFailingTests() {
+        return failingTests;
+    }
+    
+    public void setFailingTests(List<String> failingTests) {
+        this.failingTests = failingTests;
+    }
+    
+    public List<String> getExpectedFiles() {
+        return expectedFiles;
+    }
+    
+    public void setExpectedFiles(List<String> expectedFiles) {
+        this.expectedFiles = expectedFiles;
+    }
+    
+    public int getDifficultyLevel() {
+        return difficultyLevel;
+    }
+    
+    public void setDifficultyLevel(int difficultyLevel) {
+        this.difficultyLevel = difficultyLevel;
+    }
+    
+    public TaskType getTaskType() {
+        return taskType;
+    }
+    
+    public void setTaskType(TaskType taskType) {
+        this.taskType = taskType;
+    }
+    
+    public Map<String, Object> getMetadata() {
+        return metadata;
+    }
+    
+    public void setMetadata(Map<String, Object> metadata) {
+        this.metadata = metadata;
+    }
+    
+    public String getBaselinePatch() {
+        return baselinePatch;
+    }
+    
+    public void setBaselinePatch(String baselinePatch) {
+        this.baselinePatch = baselinePatch;
+    }
+    
+    public long getCreateTime() {
+        return createTime;
+    }
+    
+    public void setCreateTime(long createTime) {
+        this.createTime = createTime;
+    }
+    
+    public int getExpectedTimeMinutes() {
+        return expectedTimeMinutes;
+    }
+    
+    public void setExpectedTimeMinutes(int expectedTimeMinutes) {
+        this.expectedTimeMinutes = expectedTimeMinutes;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/swebench/task/TaskResult.java b/src/main/java/com/taobao/profile/swebench/task/TaskResult.java
new file mode 100644
index 0000000..a041b77
--- /dev/null
+++ b/src/main/java/com/taobao/profile/swebench/task/TaskResult.java
@@ -0,0 +1,323 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.swebench.task;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * SWE-bench任务执行结果
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class TaskResult {
+    
+    /**
+     * 任务ID
+     */
+    private String taskId;
+    
+    /**
+     * 模型名称
+     */
+    private String modelName;
+    
+    /**
+     * 是否成功解决
+     */
+    private boolean success;
+    
+    /**
+     * 生成的补丁内容
+     */
+    private String generatedPatch;
+    
+    /**
+     * 测试结果
+     */
+    private TestResult testResult;
+    
+    /**
+     * 性能指标
+     */
+    private PerformanceMetrics performanceMetrics;
+    
+    /**
+     * 错误信息
+     */
+    private String errorMessage;
+    
+    /**
+     * 执行日志
+     */
+    private List<String> executionLogs;
+    
+    /**
+     * 额外的结果数据
+     */
+    private Map<String, Object> additionalData;
+    
+    /**
+     * 测试结果内部类
+     */
+    public static class TestResult {
+        private int totalTests;
+        private int passedTests;
+        private int failedTests;
+        private List<String> failedTestNames;
+        private String testOutput;
+        
+        public TestResult() {
+            this.totalTests = 0;
+            this.passedTests = 0;
+            this.failedTests = 0;
+        }
+        
+        public double getPassRate() {
+            return totalTests > 0 ? (double) passedTests / totalTests * 100 : 0;
+        }
+        
+        // Getters and setters
+        public int getTotalTests() {
+            return totalTests;
+        }
+        
+        public void setTotalTests(int totalTests) {
+            this.totalTests = totalTests;
+        }
+        
+        public int getPassedTests() {
+            return passedTests;
+        }
+        
+        public void setPassedTests(int passedTests) {
+            this.passedTests = passedTests;
+        }
+        
+        public int getFailedTests() {
+            return failedTests;
+        }
+        
+        public void setFailedTests(int failedTests) {
+            this.failedTests = failedTests;
+        }
+        
+        public List<String> getFailedTestNames() {
+            return failedTestNames;
+        }
+        
+        public void setFailedTestNames(List<String> failedTestNames) {
+            this.failedTestNames = failedTestNames;
+        }
+        
+        public String getTestOutput() {
+            return testOutput;
+        }
+        
+        public void setTestOutput(String testOutput) {
+            this.testOutput = testOutput;
+        }
+    }
+    
+    /**
+     * 性能指标内部类
+     */
+    public static class PerformanceMetrics {
+        private long startTime;
+        private long endTime;
+        private long executionTimeMillis;
+        private long cpuTimeMillis;
+        private long memoryUsedBytes;
+        private int apiCallCount;
+        private int tokenCount;
+        private double costEstimate;
+        
+        public PerformanceMetrics() {
+            this.startTime = System.currentTimeMillis();
+        }
+        
+        public void recordEnd() {
+            this.endTime = System.currentTimeMillis();
+            this.executionTimeMillis = endTime - startTime;
+        }
+        
+        // Getters and setters
+        public long getStartTime() {
+            return startTime;
+        }
+        
+        public void setStartTime(long startTime) {
+            this.startTime = startTime;
+        }
+        
+        public long getEndTime() {
+            return endTime;
+        }
+        
+        public void setEndTime(long endTime) {
+            this.endTime = endTime;
+        }
+        
+        public long getExecutionTimeMillis() {
+            return executionTimeMillis;
+        }
+        
+        public void setExecutionTimeMillis(long executionTimeMillis) {
+            this.executionTimeMillis = executionTimeMillis;
+        }
+        
+        public long getCpuTimeMillis() {
+            return cpuTimeMillis;
+        }
+        
+        public void setCpuTimeMillis(long cpuTimeMillis) {
+            this.cpuTimeMillis = cpuTimeMillis;
+        }
+        
+        public long getMemoryUsedBytes() {
+            return memoryUsedBytes;
+        }
+        
+        public void setMemoryUsedBytes(long memoryUsedBytes) {
+            this.memoryUsedBytes = memoryUsedBytes;
+        }
+        
+        public int getApiCallCount() {
+            return apiCallCount;
+        }
+        
+        public void setApiCallCount(int apiCallCount) {
+            this.apiCallCount = apiCallCount;
+        }
+        
+        public int getTokenCount() {
+            return tokenCount;
+        }
+        
+        public void setTokenCount(int tokenCount) {
+            this.tokenCount = tokenCount;
+        }
+        
+        public double getCostEstimate() {
+            return costEstimate;
+        }
+        
+        public void setCostEstimate(double costEstimate) {
+            this.costEstimate = costEstimate;
+        }
+    }
+    
+    public TaskResult() {
+        this.testResult = new TestResult();
+        this.performanceMetrics = new PerformanceMetrics();
+    }
+    
+    public TaskResult(String taskId, String modelName) {
+        this();
+        this.taskId = taskId;
+        this.modelName = modelName;
+    }
+    
+    /**
+     * 生成结果摘要
+     */
+    public String generateSummary() {
+        StringBuilder summary = new StringBuilder();
+        summary.append("Task: ").append(taskId).append("\n");
+        summary.append("Model: ").append(modelName).append("\n");
+        summary.append("Success: ").append(success).append("\n");
+        summary.append("Execution Time: ").append(performanceMetrics.getExecutionTimeMillis()).append("ms\n");
+        
+        if (testResult != null) {
+            summary.append("Test Pass Rate: ").append(String.format("%.2f%%", testResult.getPassRate())).append("\n");
+            summary.append("Tests: ").append(testResult.getPassedTests()).append("/").append(testResult.getTotalTests()).append("\n");
+        }
+        
+        if (!success && errorMessage != null) {
+            summary.append("Error: ").append(errorMessage).append("\n");
+        }
+        
+        return summary.toString();
+    }
+    
+    // Getters and setters
+    
+    public String getTaskId() {
+        return taskId;
+    }
+    
+    public void setTaskId(String taskId) {
+        this.taskId = taskId;
+    }
+    
+    public String getModelName() {
+        return modelName;
+    }
+    
+    public void setModelName(String modelName) {
+        this.modelName = modelName;
+    }
+    
+    public boolean isSuccess() {
+        return success;
+    }
+    
+    public void setSuccess(boolean success) {
+        this.success = success;
+    }
+    
+    public String getGeneratedPatch() {
+        return generatedPatch;
+    }
+    
+    public void setGeneratedPatch(String generatedPatch) {
+        this.generatedPatch = generatedPatch;
+    }
+    
+    public TestResult getTestResult() {
+        return testResult;
+    }
+    
+    public void setTestResult(TestResult testResult) {
+        this.testResult = testResult;
+    }
+    
+    public PerformanceMetrics getPerformanceMetrics() {
+        return performanceMetrics;
+    }
+    
+    public void setPerformanceMetrics(PerformanceMetrics performanceMetrics) {
+        this.performanceMetrics = performanceMetrics;
+    }
+    
+    public String getErrorMessage() {
+        return errorMessage;
+    }
+    
+    public void setErrorMessage(String errorMessage) {
+        this.errorMessage = errorMessage;
+    }
+    
+    public List<String> getExecutionLogs() {
+        return executionLogs;
+    }
+    
+    public void setExecutionLogs(List<String> executionLogs) {
+        this.executionLogs = executionLogs;
+    }
+    
+    public Map<String, Object> getAdditionalData() {
+        return additionalData;
+    }
+    
+    public void setAdditionalData(Map<String, Object> additionalData) {
+        this.additionalData = additionalData;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/com/taobao/profile/thread/InnerSocketThread.java b/src/main/java/com/taobao/profile/thread/InnerSocketThread.java
index afb6722..b6c7be1 100644
--- a/src/main/java/com/taobao/profile/thread/InnerSocketThread.java
+++ b/src/main/java/com/taobao/profile/thread/InnerSocketThread.java
@@ -19,6 +19,7 @@
 
 import com.taobao.profile.Manager;
 import com.taobao.profile.runtime.MethodCache;
+import com.taobao.profile.thread.SWEBenchThread;
 
 /**
  * 对外提供Socket开关
@@ -53,6 +54,10 @@ public void run() {
 					write(child.getOutputStream());
 				} else if (Manager.FLUSHMETHOD.equals(command)) {
 					MethodCache.flushMethodData();
+				} else if (command != null && command.startsWith("swebench_")) {
+					// 处理SWE-bench相关命令
+					String response = SWEBenchThread.getInstance().handleCommand(command);
+					writeResponse(child.getOutputStream(), response);
 				} else {
 					Manager.instance().setSwitchFlag(false);
 				}
@@ -111,6 +116,20 @@ private void write(OutputStream os) throws IOException {
 		out.write('\r');
 		out.flush();
 	}
+	
+	/**
+	 * 输出响应
+	 * 
+	 * @param os
+	 * @param response
+	 * @throws IOException
+	 */
+	private void writeResponse(OutputStream os, String response) throws IOException {
+		BufferedOutputStream out = new BufferedOutputStream(os);
+		out.write(response.getBytes());
+		out.write('\r');
+		out.flush();
+	}
 
     /**
      * 调试使用
diff --git a/src/main/java/com/taobao/profile/thread/SWEBenchThread.java b/src/main/java/com/taobao/profile/thread/SWEBenchThread.java
new file mode 100644
index 0000000..66dd8d1
--- /dev/null
+++ b/src/main/java/com/taobao/profile/thread/SWEBenchThread.java
@@ -0,0 +1,120 @@
+/**
+ * (C) 2011-2012 Alibaba Group Holding Limited.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * 
+ */
+package com.taobao.profile.thread;
+
+import com.taobao.profile.Manager;
+import com.taobao.profile.swebench.SWEBenchManager;
+
+/**
+ * SWE-bench集成线程
+ * 负责处理来自InnerSocketThread的SWE-bench相关命令
+ * 
+ * @author TProfiler Team
+ * @since 2025-1
+ */
+public class SWEBenchThread {
+    
+    private static SWEBenchThread instance = new SWEBenchThread();
+    
+    private SWEBenchThread() {
+    }
+    
+    public static SWEBenchThread getInstance() {
+        return instance;
+    }
+    
+    /**
+     * 处理SWE-bench命令
+     * 
+     * @param command 命令
+     * @return 响应结果
+     */
+    public String handleCommand(String command) {
+        if (command == null) {
+            return "ERROR: 命令为空";
+        }
+        
+        String[] parts = command.split(":");
+        String action = parts[0];
+        
+        try {
+            if (Manager.SWEBENCH_START.equals(action)) {
+                if (parts.length < 2) {
+                    return "ERROR: 缺少模型名称参数";
+                }
+                return startSWEBench(parts[1]);
+                
+            } else if (Manager.SWEBENCH_STOP.equals(action)) {
+                return stopSWEBench();
+                
+            } else if (Manager.SWEBENCH_STATUS.equals(action)) {
+                return getSWEBenchStatus();
+                
+            } else {
+                return "ERROR: 未知的SWE-bench命令: " + action;
+            }
+        } catch (Exception e) {
+            return "ERROR: " + e.getMessage();
+        }
+    }
+    
+    /**
+     * 启动SWE-bench评测
+     */
+    private String startSWEBench(String modelName) {
+        try {
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.initialize();
+            
+            // 在新线程中启动评测，避免阻塞
+            Thread benchmarkThread = new Thread(new Runnable() {
+                @Override
+                public void run() {
+                    manager.startBenchmark(modelName);
+                }
+            });
+            benchmarkThread.setName("TProfiler-SWEBench-" + modelName);
+            benchmarkThread.setDaemon(true);
+            benchmarkThread.start();
+            
+            return "OK: SWE-bench评测已启动，模型: " + modelName;
+            
+        } catch (Exception e) {
+            return "ERROR: 启动失败 - " + e.getMessage();
+        }
+    }
+    
+    /**
+     * 停止SWE-bench评测
+     */
+    private String stopSWEBench() {
+        try {
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            manager.stopBenchmark();
+            return "OK: SWE-bench评测已停止";
+            
+        } catch (Exception e) {
+            return "ERROR: 停止失败 - " + e.getMessage();
+        }
+    }
+    
+    /**
+     * 获取SWE-bench状态
+     */
+    private String getSWEBenchStatus() {
+        try {
+            SWEBenchManager manager = SWEBenchManager.getInstance();
+            String status = manager.getStatus();
+            return "OK: SWE-bench状态 - " + status;
+            
+        } catch (Exception e) {
+            return "ERROR: 获取状态失败 - " + e.getMessage();
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/resources/swebench.properties b/src/main/resources/swebench.properties
new file mode 100644
index 0000000..f33a823
--- /dev/null
+++ b/src/main/resources/swebench.properties
@@ -0,0 +1,27 @@
+# SWE-bench 配置文件
+
+# 基本配置
+swebench.parallel.tasks=4
+swebench.task.timeout=30
+swebench.max.retry=3
+
+# 路径配置
+swebench.report.path=${user.home}/swebench-reports
+swebench.task.path=${user.home}/swebench-tasks
+
+# 功能开关
+swebench.enable.profiling=true
+swebench.save.intermediate=true
+
+# Docker配置
+swebench.docker.image=swebench/eval:latest
+
+# 数据集配置
+# 可选值: full, lite, verified
+swebench.dataset.type=lite
+
+# 模型API配置
+# 如果使用云端模型，请配置以下参数
+# swebench.model.api.url=https://api.openai.com/v1/completions
+# swebench.model.api.key=your-api-key-here
+swebench.model.max.tokens=4096
\ No newline at end of file