From 6df8c22f1ed6f374bc067ad972ff0ec7e6b78220 Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 18 Feb 2026 20:22:00 +0000 Subject: [PATCH 1/3] feat(cli): add RPC methods for agent journey, submission history, stats, and decay --- cli/src/rpc.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/cli/src/rpc.rs b/cli/src/rpc.rs index 350cd15c..5d373906 100644 --- a/cli/src/rpc.rs +++ b/cli/src/rpc.rs @@ -214,4 +214,55 @@ impl RpcClient { .map(|r| ChallengeInfo { id: r.id }) .collect()) } + + pub async fn fetch_agent_journey( + &self, + challenge_id: &str, + hotkey: &str, + ) -> anyhow::Result { + let params = serde_json::json!({ + "challengeId": challenge_id, + "method": "GET", + "path": format!("/agent/{}/journey", hotkey) + }); + let result = self.call("challenge_call", params).await?; + Ok(result) + } + + pub async fn fetch_submission_history( + &self, + challenge_id: &str, + hotkey: &str, + ) -> anyhow::Result { + let params = serde_json::json!({ + "challengeId": challenge_id, + "method": "GET", + "path": format!("/agent/{}/logs", hotkey) + }); + let result = self.call("challenge_call", params).await?; + Ok(result) + } + + pub async fn fetch_stats(&self, challenge_id: &str) -> anyhow::Result { + let params = serde_json::json!({ + "challengeId": challenge_id, + "method": "GET", + "path": "/stats" + }); + let result = self.call("challenge_call", params).await?; + Ok(result) + } + + pub async fn fetch_decay_status( + &self, + challenge_id: &str, + ) -> anyhow::Result { + let params = serde_json::json!({ + "challengeId": challenge_id, + "method": "GET", + "path": "/decay" + }); + let result = self.call("challenge_call", params).await?; + Ok(result) + } } From 0a0e389d2da970390256b0e7c09ba9da59b5f806 Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 18 Feb 2026 20:28:14 +0000 Subject: [PATCH 2/3] feat(wasm): add submission, LLM review, AST validation, timeout modules and enhance existing modules - Add types: SubmissionName, SubmissionVersion, LlmReviewResult, AstReviewResult, EvaluationStatus, TopAgentState, LeaderboardEntry, StatsResponse, TimeoutConfig, WhitelistConfig, LlmMessage, LlmRequest, LlmResponse, WasmRouteRequest - Create submission.rs: name registration, versioned submissions, history - Create llm_review.rs: LLM code review via host_http_post, reviewer selection, result storage/aggregation - Create ast_validation.rs: Python code validation with whitelist config, forbidden builtins, dangerous patterns, import checking - Create timeout_handler.rs: timeout config, assignment tracking, replacement selection - Enhance scoring.rs: top agent state tracking, epoch decay integration, remove dead_code allow - Enhance dataset.rs: consensus logic, random index generation, proposals - Enhance agent_storage.rs: evaluation status storage, remove dead_code allows - Rewrite routes.rs: 24 route definitions, functional handlers for all endpoints - Update lib.rs: integrate new modules into evaluate(), add routes/handle_route methods, store scores and submission records --- README.md | 162 +++++++++++++--- docs/architecture.md | 363 +++++++++++++++++++++++++++++++++++ docs/miner/how-to-mine.md | 312 ++++++++++++++++++++++++++++++ docs/miner/submission.md | 188 ++++++++++++++++++ wasm/src/agent_storage.rs | 25 ++- wasm/src/ast_validation.rs | 134 +++++++++++++ wasm/src/dataset.rs | 121 +++++++++++- wasm/src/lib.rs | 156 ++++++++++++--- wasm/src/llm_review.rs | 192 +++++++++++++++++++ wasm/src/routes.rs | 370 ++++++++++++++++++++++++++++++++++-- wasm/src/scoring.rs | 70 ++++++- wasm/src/submission.rs | 102 ++++++++++ wasm/src/timeout_handler.rs | 102 ++++++++++ wasm/src/types.rs | 207 ++++++++++++++++++++ 14 files changed, 2420 insertions(+), 84 deletions(-) create mode 100644 docs/architecture.md create mode 100644 docs/miner/how-to-mine.md create mode 100644 docs/miner/submission.md create mode 100644 wasm/src/ast_validation.rs create mode 100644 wasm/src/llm_review.rs create mode 100644 wasm/src/submission.rs create mode 100644 wasm/src/timeout_handler.rs diff --git a/README.md b/README.md index 1e48876f..2e1e43fc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ -Term Challenge is a WASM evaluation module for AI agents on the Bittensor network. It runs inside [platform-v2](https://github.com/PlatformNetwork/platform-v2) validators to evaluate miner submissions against SWE-bench tasks. +Term Challenge is a WASM evaluation module for AI agents on the Bittensor network. It runs inside [platform-v2](https://github.com/PlatformNetwork/platform-v2) validators to evaluate miner submissions against SWE-bench tasks. Miners submit Python agent packages that autonomously solve software engineering issues, and the network scores them through a multi-stage review pipeline including LLM-based code review and AST structural validation. --- @@ -22,6 +22,7 @@ flowchart LR Miner[Miner] -->|Submit Agent ZIP| RPC[Validator RPC] RPC --> Validators[Validator Network] Validators --> WASM[term-challenge WASM] + WASM --> Storage[(Blockchain Storage)] Validators --> Executor[term-executor] Executor -->|Task Results| Validators Validators -->|Scores + Weights| BT[Bittensor Chain] @@ -31,30 +32,97 @@ flowchart LR --- -## Evaluation Flow +## Evaluation Pipeline ```mermaid sequenceDiagram participant M as Miner participant V as Validators + participant LLM as LLM Reviewers (×3) + participant AST as AST Reviewers (×3) participant W as WASM Module participant E as term-executor participant BT as Bittensor M->>V: Submit agent zip + metadata V->>W: validate(submission) - W-->>V: approved (>50% consensus) + W-->>V: Approved (>50% consensus) + V->>LLM: Assign LLM code review + V->>AST: Assign AST structural review + LLM-->>V: LLM review scores + AST-->>V: AST review scores V->>E: Execute agent on SWE-bench tasks E-->>V: Task results + scores V->>W: evaluate(results) W-->>V: Aggregate score + weight V->>V: Store agent code & logs - V->>V: Log consensus (>50% agreement) + V->>V: Log consensus (>50% hash agreement) V->>BT: Submit weights at epoch boundary ``` --- +## Validator Assignment + +```mermaid +flowchart TB + Sub[New Submission] --> Seed[Deterministic Seed from submission_id] + Seed --> Select[Select 6 Validators] + Select --> LLM[3 LLM Reviewers] + Select --> AST[3 AST Reviewers] + LLM --> LR1[LLM Reviewer 1] + LLM --> LR2[LLM Reviewer 2] + LLM --> LR3[LLM Reviewer 3] + AST --> AR1[AST Reviewer 1] + AST --> AR2[AST Reviewer 2] + AST --> AR3[AST Reviewer 3] + LR1 & LR2 & LR3 -->|Timeout?| TD1{Responded?} + AR1 & AR2 & AR3 -->|Timeout?| TD2{Responded?} + TD1 -->|No| Rep1[Replacement Validator] + TD1 -->|Yes| Agg[Result Aggregation] + TD2 -->|No| Rep2[Replacement Validator] + TD2 -->|Yes| Agg + Rep1 --> Agg + Rep2 --> Agg + Agg --> Score[Final Score] +``` + +--- + +## Submission Flow + +```mermaid +flowchart LR + Register[Register Name] -->|First-register-owns| Name[Submission Name] + Name --> Version[Auto-increment Version] + Version --> Pack[Package Agent ZIP ≤ 1MB] + Pack --> Sign[Sign with sr25519] + Sign --> Submit[Submit via RPC] + Submit --> RateCheck{Epoch Rate Limit OK?} + RateCheck -->|No: < 3 epochs since last| Reject[Rejected] + RateCheck -->|Yes| Validate[WASM validate] + Validate --> Consensus{>50% Validator Approval?} + Consensus -->|No| Reject + Consensus -->|Yes| Evaluate[Evaluation Pipeline] + Evaluate --> Store[Store Code + Hash + Logs] +``` + +--- + +## Decay Mechanism + +```mermaid +flowchart LR + Top[Top Score Achieved] --> Grace[72h Grace Period] + Grace -->|Within grace| Full[100% Weight Retained] + Grace -->|After grace| Decay[Exponential Decay Begins] + Decay --> Half[50% per 24h half-life] + Half --> Min[Decay to 0.0 min multiplier] + Min --> Burn[Weight Burns to UID 0] +``` + +--- + ## CLI Data Flow ```mermaid @@ -101,11 +169,37 @@ flowchart TB --- +## Route Architecture + +```mermaid +flowchart LR + Client[Client] -->|JSON-RPC| RPC[RPC Server] + RPC -->|challenge_call| WE[WASM Executor] + WE -->|handle_route request| WM[WASM Module] + WM --> Router{Route Match} + Router --> LB[/leaderboard] + Router --> Subs[/submissions] + Router --> DS[/dataset] + Router --> Stats[/stats] + Router --> Agent[/agent/:hotkey/code] + LB & Subs & DS & Stats & Agent --> Storage[(Storage)] + Storage --> Response[Serialized Response] + Response --> WE + WE --> RPC + RPC --> Client +``` + +--- + ## Features - **WASM Module**: Compiles to `wasm32-unknown-unknown`, loaded by platform-v2 validators - **SWE-bench Evaluation**: Tasks selected from HuggingFace CortexLM/swe-bench datasets -- **LLM Judge**: Integrated LLM scoring via platform-v2 host functions +- **LLM Code Review**: 3 validators perform LLM-based code review via host functions +- **AST Structural Validation**: 3 validators perform AST-based structural analysis +- **Submission Versioning**: Auto-incrementing versions with full history tracking +- **Timeout Handling**: Unresponsive reviewers are replaced with alternate validators +- **Route Handlers**: WASM-native route handling for leaderboard, submissions, dataset, and agent data - **Epoch Rate Limiting**: 1 submission per 3 epochs per miner - **Top Agent Decay**: 72h grace period, 50% daily decay to 0 weight - **P2P Dataset Consensus**: Validators collectively select 50 evaluation tasks @@ -137,22 +231,29 @@ This repository contains the WASM evaluation module and a native CLI for monitor ``` term-challenge/ -├── wasm/ # WASM evaluation module +├── wasm/ # WASM evaluation module │ └── src/ -│ ├── lib.rs # Challenge trait implementation -│ ├── types.rs # Submission, task, and config types -│ ├── scoring.rs # Score aggregation and decay -│ ├── tasks.rs # Active dataset management -│ ├── dataset.rs # Dataset selection consensus -│ ├── routes.rs # RPC route definitions -│ └── agent_storage.rs # Agent code & log storage functions -├── cli/ # Native TUI monitoring tool +│ ├── lib.rs # Challenge trait implementation (validate + evaluate) +│ ├── types.rs # Submission, task, config, route, and log types +│ ├── scoring.rs # Score aggregation, decay, and weight calculation +│ ├── tasks.rs # Active dataset management and history +│ ├── dataset.rs # Dataset selection and P2P consensus logic +│ ├── routes.rs # WASM route definitions for RPC (handle_route) +│ └── agent_storage.rs # Agent code, hash, and log storage functions +├── cli/ # Native TUI monitoring tool │ └── src/ -│ ├── main.rs # Entry point, event loop -│ ├── app.rs # Application state -│ ├── ui.rs # Ratatui UI rendering -│ └── rpc.rs # JSON-RPC 2.0 client -├── AGENTS.md # Development guide +│ ├── main.rs # Entry point, event loop +│ ├── app.rs # Application state +│ ├── ui.rs # Ratatui UI rendering +│ └── rpc.rs # JSON-RPC 2.0 client +├── docs/ +│ ├── architecture.md # System architecture and internals +│ ├── miner/ +│ │ ├── how-to-mine.md # Complete miner guide +│ │ └── submission.md # Submission format and review process +│ └── validator/ +│ └── setup.md # Validator setup and operations +├── AGENTS.md # Development guide └── README.md ``` @@ -162,11 +263,15 @@ term-challenge/ 1. Miners submit zip packages with agent code and SWE-bench task results 2. Platform-v2 validators load this WASM module -3. `validate()` checks signatures, epoch rate limits, and Basilica metadata -4. `evaluate()` scores task results and applies LLM judge scoring -5. Agent code and hash are stored on-chain for auditability (≤ 1MB per package) -6. Evaluation logs are proposed and validated via P2P consensus (>50% hash agreement) -7. Scores are aggregated via P2P consensus and submitted to Bittensor +3. `validate()` checks signatures, epoch rate limits, package size, and Basilica metadata +4. **6 review validators** are deterministically selected (3 LLM + 3 AST) to review the submission +5. LLM reviewers score code quality; AST reviewers validate structural integrity +6. Timed-out reviewers are automatically replaced with alternate validators +7. `evaluate()` scores task results, applies LLM judge scoring, and computes aggregate weights +8. Agent code and hash are stored on-chain for auditability (≤ 1MB per package) +9. Evaluation logs are proposed and validated via P2P consensus (>50% hash agreement) +10. Scores are aggregated via P2P consensus and submitted to Bittensor at epoch boundaries +11. Top agents enter a decay cycle: 72h grace → 50% daily decay → weight burns to UID 0 --- @@ -190,6 +295,15 @@ term-cli --hotkey 5GrwvaEF... --tab leaderboard --- +## Documentation + +- [Architecture Overview](docs/architecture.md) — System components, host functions, P2P messages, storage schema +- [Miner Guide](docs/miner/how-to-mine.md) — How to build and submit agents +- [Submission Guide](docs/miner/submission.md) — Naming, versioning, and review process +- [Validator Setup](docs/validator/setup.md) — Hardware requirements, configuration, and operations + +--- + ## License Apache-2.0 diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..321d891e --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,363 @@ +# Architecture Overview + +This document describes the internal architecture of Term Challenge, including system components, the WASM module design, host function surface, P2P message types, and storage schema. + +--- + +## System Components + +```mermaid +flowchart TB + subgraph Miner + Agent[Python Agent] + CLI[term-cli TUI] + end + + subgraph Platform-v2 Validator + RPC[RPC Server] + P2P[P2P Consensus
libp2p gossipsub + DHT] + WR[WASM Runtime] + WASM[term-challenge.wasm] + Exec[term-executor] + BT[Bittensor Integration] + Store[Blockchain Storage] + end + + subgraph Bittensor + Chain[Bittensor Chain] + end + + Agent -->|ZIP submission| RPC + CLI -->|JSON-RPC| RPC + RPC --> WR + WR --> WASM + WASM -->|host_storage_get/set| Store + WASM -->|host_http_post| Exec + P2P <-->|Consensus messages| P2P + WR --> P2P + BT -->|Weights| Chain + P2P --> BT +``` + +--- + +## WASM Module Architecture + +```mermaid +flowchart TB + subgraph "term-challenge-wasm (no_std)" + Lib[lib.rs
Challenge trait impl] + Types[types.rs
Submission, TaskResult,
ChallengeParams, DecayParams] + Scoring[scoring.rs
Aggregate scoring,
decay, weight calc] + Tasks[tasks.rs
Active dataset
management] + Dataset[dataset.rs
Dataset selection
consensus logic] + Routes[routes.rs
Route definitions
for RPC] + Storage[agent_storage.rs
Code, hash, log
storage functions] + end + + Lib --> Types + Lib --> Scoring + Lib --> Storage + Lib --> Tasks + Tasks --> Dataset + Lib --> Routes + + subgraph "Host Functions (platform-v2)" + HStorage[host_storage_get/set] + HHttp[host_http_post] + HEpoch[host_consensus_get_epoch] + end + + Lib --> HStorage + Lib --> HHttp + Lib --> HEpoch +``` + +### Module Responsibilities + +| Module | Purpose | +| --- | --- | +| `lib.rs` | Implements the `Challenge` trait: `validate()`, `evaluate()`, `tasks()`, `configure()` | +| `types.rs` | All data structures: `Submission`, `TaskResult`, `ChallengeParams`, `DecayParams`, `AgentLogs`, `RouteDefinition` | +| `scoring.rs` | Score aggregation by difficulty, pass rate calculation, decay application, weight conversion | +| `tasks.rs` | Active dataset storage/retrieval, dataset history management | +| `dataset.rs` | P2P dataset consensus logic (reserved for future implementation) | +| `routes.rs` | Route definitions for challenge RPC endpoints | +| `agent_storage.rs` | Agent code, hash, and log storage with size limits | + +--- + +## Host Function Surface + +These are the host functions available to WASM challenge modules, provided by `platform-challenge-sdk-wasm`. Term Challenge uses a subset of these. + +### Network Functions (`platform_network`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_http_get` | `(request: &[u8]) → Result, i32>` | HTTP GET request | No | +| `host_http_post` | `(request: &[u8], body: &[u8]) → Result, i32>` | HTTP POST request | Yes (LLM judge) | +| `host_dns_resolve` | `(request: &[u8]) → Result, i32>` | DNS resolution | No | + +### Storage Functions (`platform_storage`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_storage_get` | `(key: &[u8]) → Result, i32>` | Read from blockchain storage | Yes | +| `host_storage_set` | `(key: &[u8], value: &[u8]) → Result<(), i32>` | Write to blockchain storage | Yes | + +### Terminal Functions (`platform_terminal`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_terminal_exec` | `(request: &[u8]) → Result, i32>` | Execute terminal command | No | +| `host_read_file` | `(path: &[u8]) → Result, i32>` | Read file contents | No | +| `host_write_file` | `(path: &[u8], data: &[u8]) → Result<(), i32>` | Write file contents | No | +| `host_list_dir` | `(path: &[u8]) → Result, i32>` | List directory contents | No | +| `host_get_time` | `() → i64` | Get current timestamp | No | +| `host_random_seed` | `(buf: &mut [u8]) → Result<(), i32>` | Fill buffer with random bytes | No | + +### Sandbox Functions (`platform_sandbox`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_sandbox_exec` | `(request: &[u8]) → Result, i32>` | Execute in sandbox | No | +| `host_get_timestamp` | `() → i64` | Get sandbox timestamp | No | +| `host_log` | `(level: u8, msg: &str) → ()` | Log a message | No | + +### LLM Functions (`platform_llm`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_llm_chat_completion` | `(request: &[u8]) → Result, i32>` | LLM chat completion | No (uses HTTP post instead) | +| `host_llm_is_available` | `() → bool` | Check LLM availability | No | + +### Consensus Functions (`platform_consensus`) + +| Function | Signature | Description | Used by Term Challenge | +| --- | --- | --- | --- | +| `host_consensus_get_epoch` | `() → i64` | Get current epoch number | Yes | +| `host_consensus_get_validators` | `() → Result, i32>` | Get validator list | No | +| `host_consensus_propose_weight` | `(uid: i32, weight: i32) → Result<(), i32>` | Propose a weight | No | +| `host_consensus_get_votes` | `() → Result, i32>` | Get consensus votes | No | +| `host_consensus_get_state_hash` | `() → Result<[u8; 32], i32>` | Get state hash | No | +| `host_consensus_get_submission_count` | `() → i32` | Get submission count | No | +| `host_consensus_get_block_height` | `() → i64` | Get block height | No | + +--- + +## WASM ABI Exports + +The `register_challenge!` macro exports these functions from the WASM module: + +| Export | Signature | Description | +| --- | --- | --- | +| `evaluate` | `(agent_ptr: i32, agent_len: i32) → i64` | Evaluate a submission, returns packed ptr+len | +| `validate` | `(agent_ptr: i32, agent_len: i32) → i32` | Validate a submission, returns 0 or 1 | +| `get_name` | `() → i32` | Return challenge name | +| `get_version` | `() → i32` | Return challenge version | +| `generate_task` | `(params_ptr: i32, params_len: i32) → i64` | Generate a task | +| `setup_environment` | `(config_ptr: i32, config_len: i32) → i32` | Set up environment | +| `get_tasks` | `() → i64` | Return active task definitions | +| `configure` | `(config_ptr: i32, config_len: i32) → i32` | Configure challenge with dataset | +| `get_routes` | `() → i64` | Return route definitions | +| `handle_route` | `(req_ptr: i32, req_len: i32) → i64` | Handle an incoming route request | +| `alloc` | `(size: usize) → *mut u8` | Allocate memory in WASM linear memory | + +--- + +## P2P Message Types + +These message types are used for inter-validator communication over libp2p gossipsub. Term Challenge interacts with these through platform-v2's consensus layer. + +### Consensus Messages + +| Message | Description | +| --- | --- | +| `Proposal` | Leader proposes a state transition (view, sequence, content, signature) | +| `PrePrepare` | Leader broadcasts after receiving proposal | +| `Prepare` | Validators acknowledge pre-prepare (2f+1 required) | +| `Commit` | Validators commit to the proposal | +| `ViewChange` | Request new leader election | +| `NewView` | New leader announces with collected view changes | + +### Challenge Evaluation Messages + +| Message | Description | +| --- | --- | +| `Submission` | Agent code submission for evaluation (submission_id, challenge_id, miner, agent_hash) | +| `Evaluation` | Evaluation result from a validator (score, metrics, execution_time) | +| `WeightVote` | Weight vote for epoch finalization (uid → weight vector) | + +### Challenge Lifecycle Messages + +| Message | Description | +| --- | --- | +| `JobClaim` | Validator claims evaluation work capacity | +| `JobAssignment` | Assigns a submission evaluation to a validator | +| `DataRequest` | Request challenge-related data from peers | +| `DataResponse` | Response containing requested challenge data | +| `TaskProgress` | Progress update during evaluation (task_index, total_tasks, progress_pct) | +| `TaskResult` | Result of a single task evaluation (passed, score, output) | +| `LeaderboardRequest` | Request leaderboard data with pagination | +| `LeaderboardResponse` | Response with serialized leaderboard entries | +| `ChallengeUpdate` | Update notification for challenge configuration | +| `StorageProposal` | Propose storing a key-value pair in consensus storage | +| `StorageVote` | Vote on a storage proposal (approve/reject) | + +### Review Messages + +| Message | Description | +| --- | --- | +| `ReviewAssignment` | Assigns review validators for a submission (3 LLM + 3 AST, deterministic seed) | +| `ReviewDecline` | Validator declines or times out on a review assignment | +| `ReviewResult` | Review result with score and details (review_type: Llm or Ast) | + +### Agent Log Messages + +| Message | Description | +| --- | --- | +| `AgentLogProposal` | Proposes agent evaluation logs for P2P consensus (logs_hash, logs_data ≤ 256KB) | + +### Network Maintenance Messages + +| Message | Description | +| --- | --- | +| `Heartbeat` | Validator presence signal (state_hash, sequence, stake) | +| `PeerAnnounce` | Peer discovery with multiaddresses and peer_id | + +### State Sync Messages + +| Message | Description | +| --- | --- | +| `StateRequest` | Request state synchronization from peers | +| `StateResponse` | Response with state data and optional Merkle proof | + +--- + +## Storage Key Schema + +Term Challenge uses the following storage keys via `host_storage_get` and `host_storage_set`: + +### Agent Storage Keys + +| Key Format | Content | Max Size | Module | +| --- | --- | --- | --- | +| `agent_code::` | Raw ZIP package bytes | 1 MB (1,048,576 bytes) | `agent_storage` | +| `agent_hash::` | Agent package hash string | Unbounded | `agent_storage` | +| `agent_logs::` | Serialized `AgentLogs` struct | 256 KB (262,144 bytes) | `agent_storage` | + +### Submission Tracking Keys + +| Key Format | Content | Size | Module | +| --- | --- | --- | --- | +| `last_submission:` | Last submission epoch (u64 LE) | 8 bytes | `lib` | + +### Dataset Keys + +| Key Format | Content | Size | Module | +| --- | --- | --- | --- | +| `active_dataset` | Serialized `Vec` | Variable | `tasks` | +| `dataset_history` | Serialized `Vec` (max 100 entries) | Variable | `tasks` | + +### Key Encoding + +- **Hotkey**: Raw bytes of the miner's hotkey string (`miner_hotkey.as_bytes()`) +- **Epoch**: Little-endian encoded `u64` (`epoch.to_le_bytes()`) +- **Separator**: ASCII colon (`:`, byte `0x3A`) + +--- + +## Data Types + +### Core Submission Types + +``` +Submission { + agent_hash: String, + miner_hotkey: String, + signature: Vec, + epoch: u64, + package_zip: Vec, + basilica_instance: String, + executor_url: String, + executor_token: String, + task_results: Vec, +} + +TaskResult { + task_id: String, + passed: bool, + score: f64, + execution_time_ms: u64, + test_output: String, + agent_output: String, + error: Option, +} +``` + +### Configuration Types + +``` +ChallengeParams { + tasks: Vec, + llm_judge_url: Option, + decay_params: Option, + active_dataset: Option>, +} + +DecayParams { + grace_period_hours: u64, // default: 72 + half_life_hours: u64, // default: 24 + min_multiplier: f64, // default: 0.0 +} + +TaskDefinition { + id: String, + name: String, + repo: String, + base_commit: String, + difficulty: Difficulty, // Easy | Medium | Hard + timeout_secs: u64, +} +``` + +### Route Types + +``` +WasmRouteRequest { + method: String, + path: String, + params: Vec<(String, String)>, + query: Vec<(String, String)>, + body: Vec, + auth_hotkey: Option, +} + +WasmRouteResponse { + status: u16, + body: Vec, +} +``` + +--- + +## Serialization + +- **WASM ↔ Host**: `bincode` with fixed-int encoding and size limits +- **P2P Messages**: `bincode` serialization (max 16 MB per message) +- **Storage Values**: `bincode` serialization +- **RPC**: JSON-RPC 2.0 over HTTP + +### Size Limits + +| Context | Limit | Constant | +| --- | --- | --- | +| Submission deserialization | 64 MB | `MAX_SUBMISSION_SIZE` | +| Challenge params deserialization | 4 MB | `MAX_PARAMS_SIZE` | +| LLM response deserialization | 1 MB | `MAX_LLM_RESPONSE_SIZE` | +| P2P message | 16 MB | `MAX_P2P_MESSAGE_SIZE` | +| Agent package ZIP | 1 MB | `MAX_AGENT_PACKAGE_SIZE` | +| Agent logs | 256 KB | `MAX_LOG_SIZE` | +| Task output preview | 4 KB | `MAX_TASK_OUTPUT_PREVIEW` | +| Max tasks per submission | 256 | `MAX_TASKS` | diff --git a/docs/miner/how-to-mine.md b/docs/miner/how-to-mine.md new file mode 100644 index 00000000..ea8c512c --- /dev/null +++ b/docs/miner/how-to-mine.md @@ -0,0 +1,312 @@ +# How to Mine on Term Challenge + +This guide walks you through building and submitting an AI agent to the Term Challenge subnet on Bittensor. + +--- + +## Overview + +```mermaid +flowchart LR + Dev[Develop Agent] --> Test[Test Locally] + Test --> Pack[Package as ZIP] + Pack --> Submit[Submit via CLI] + Submit --> RPC[Validator RPC] + RPC --> Review[LLM + AST Review] + Review --> Eval[SWE-bench Evaluation] + Eval --> Score[Score + Weight] + Score --> TAO[TAO Rewards] +``` + +Miners create Python agents that solve SWE-bench software engineering tasks. Agents run inside a sandboxed executor with access to a git repository, task description, and optional LLM APIs. The network evaluates your agent against 50 tasks per epoch and assigns a score based on pass rate. + +--- + +## Prerequisites + +| Requirement | Version | Purpose | +| --- | --- | --- | +| Python | 3.10+ | Agent runtime | +| Docker | 24.0+ | Local testing with term-executor | +| Rust | 1.90+ | Building term-cli from source (optional) | +| Git | 2.30+ | Repository operations | +| LLM API Key | — | Agent LLM access via litellm (recommended) | + +### Bittensor Requirements + +- A registered hotkey on the Term Challenge subnet +- Sufficient TAO for registration fees +- `btcli` installed for key management + +--- + +## Installation + +### 1. Clone the Repository + +```bash +git clone https://github.com/PlatformNetwork/term-challenge.git +cd term-challenge +``` + +### 2. Install the CLI + +```bash +# Option A: Download pre-built binary +platform download term-challenge + +# Option B: Build from source +cargo build --release -p term-cli +``` + +### 3. Set Up Python Environment + +```bash +python3 -m venv venv +source venv/bin/activate +pip install litellm requests +``` + +--- + +## Agent Project Structure + +Your agent submission is a ZIP file containing at minimum: + +``` +my-agent/ +├── agent.py # Entry point (required) +├── requirements.txt # Python dependencies (required) +└── utils/ # Optional helper modules + └── helpers.py +``` + +### `agent.py` — Entry Point + +The executor runs `python agent.py` inside the task repository. Your agent receives task context through environment variables and must produce a git patch that solves the issue. + +### `requirements.txt` — Dependencies + +List all Python packages your agent needs. These are installed via `pip install -r requirements.txt` before execution. + +--- + +## Minimal Agent Example + +```python +"""Minimal Term Challenge agent using litellm.""" +import os +import subprocess + +TASK_ID = os.environ.get("TERM_TASK_ID", "") +REPO = os.environ.get("TERM_REPO", "") +BASE_COMMIT = os.environ.get("TERM_BASE_COMMIT", "") +ISSUE_TEXT = os.environ.get("TERM_ISSUE_TEXT", "") +HINTS = os.environ.get("TERM_HINTS", "") + +def run(cmd, **kwargs): + result = subprocess.run(cmd, shell=True, capture_output=True, text=True, **kwargs) + return result.stdout, result.stderr, result.returncode + +def solve(): + try: + from litellm import completion + except ImportError: + run("pip install litellm") + from litellm import completion + + repo_structure, _, _ = run("find . -type f -name '*.py' | head -50") + + response = completion( + model="gpt-4o", + messages=[ + { + "role": "system", + "content": "You are a software engineer. Generate a unified diff patch to fix the described issue.", + }, + { + "role": "user", + "content": ( + f"Repository: {REPO}\n" + f"Issue: {ISSUE_TEXT}\n" + f"Hints: {HINTS}\n" + f"Files:\n{repo_structure}\n\n" + "Provide ONLY a unified diff patch." + ), + }, + ], + ) + + patch = response.choices[0].message.content + with open("/tmp/fix.patch", "w") as f: + f.write(patch) + + run("git apply /tmp/fix.patch") + run("git add -A") + run('git commit -m "Fix issue"') + +if __name__ == "__main__": + solve() +``` + +--- + +## Environment Variables + +The executor sets these environment variables before running your agent: + +| Variable | Description | Example | +| --- | --- | --- | +| `TERM_TASK_ID` | Unique task identifier | `django__django-16527` | +| `TERM_REPO` | Repository name | `django/django` | +| `TERM_BASE_COMMIT` | Git commit to start from | `a1b2c3d4e5f6...` | +| `TERM_ISSUE_TEXT` | Full issue description text | *(multiline)* | +| `TERM_HINTS` | Optional hints for the task | *(may be empty)* | +| `TERM_TIMEOUT` | Execution timeout in seconds | `300` | +| `TERM_DIFFICULTY` | Task difficulty level | `Easy`, `Medium`, or `Hard` | +| `TERM_CHECKPOINT_DIR` | Directory for checkpoint files | `/tmp/checkpoints` | + +--- + +## Checkpoints + +Agents can save intermediate state to the checkpoint directory. This is useful for: + +- Resuming work if the agent is interrupted +- Storing intermediate analysis results +- Caching LLM responses to avoid redundant API calls + +```python +import os +import json + +CHECKPOINT_DIR = os.environ.get("TERM_CHECKPOINT_DIR", "/tmp/checkpoints") + +def save_checkpoint(name, data): + os.makedirs(CHECKPOINT_DIR, exist_ok=True) + path = os.path.join(CHECKPOINT_DIR, f"{name}.json") + with open(path, "w") as f: + json.dump(data, f) + +def load_checkpoint(name): + path = os.path.join(CHECKPOINT_DIR, f"{name}.json") + if os.path.exists(path): + with open(path) as f: + return json.load(f) + return None +``` + +--- + +## Testing Locally + +### 1. Run Against a Single Task + +```bash +# Set up a test task +export TERM_TASK_ID="test-task-001" +export TERM_REPO="my-org/my-repo" +export TERM_BASE_COMMIT="main" +export TERM_ISSUE_TEXT="Fix the bug in module X" +export TERM_TIMEOUT="300" + +# Clone the target repo +git clone https://github.com/$TERM_REPO /tmp/test-repo +cd /tmp/test-repo +git checkout $TERM_BASE_COMMIT + +# Run your agent +python /path/to/my-agent/agent.py +``` + +### 2. Verify the Patch + +```bash +# Check that changes were committed +git log --oneline -1 + +# View the diff +git diff HEAD~1 +``` + +### 3. Run Tests (if available) + +```bash +# Run the repository's test suite to verify the fix +python -m pytest tests/ -x +``` + +--- + +## Submitting via CLI + +### 1. Package Your Agent + +```bash +cd my-agent/ +zip -r ../my-agent.zip . +``` + +The ZIP file must be **≤ 1 MB**. Keep your agent lean — avoid bundling large model weights or datasets. + +### 2. Submit + +```bash +term-cli submit \ + --rpc-url http://chain.platform.network:9944 \ + --hotkey /path/to/hotkey \ + --agent-zip my-agent.zip \ + --name "my-agent" +``` + +### 3. Monitor Progress + +```bash +# Launch the TUI to watch evaluation progress +term-cli --rpc-url http://chain.platform.network:9944 --tab evaluation +``` + +--- + +## Scoring + +Your agent is scored based on: + +| Metric | Weight | Description | +| --- | --- | --- | +| Pass Rate | Primary | Percentage of SWE-bench tasks solved | +| Difficulty Bonus | Weighted | Hard tasks contribute more to score | +| LLM Judge Score | Modifier | Code quality assessed by LLM reviewers | +| Execution Time | Tiebreaker | Faster solutions preferred at equal scores | + +The final weight is calculated as `pass_rate × 10,000` (scaled to integer) and submitted to Bittensor. + +--- + +## Rate Limits + +- **1 submission per 3 epochs** per miner hotkey +- Submitting more frequently results in automatic rejection at the `validate()` stage +- Plan your submissions carefully — iterate locally before submitting + +--- + +## Common Errors and Troubleshooting + +| Error | Cause | Solution | +| --- | --- | --- | +| `submission exceeds maximum task count` | Too many task results in submission | Ensure results match the active dataset (50 tasks) | +| `epoch rate limit` | Submitted too recently | Wait at least 3 epochs between submissions | +| `package_zip exceeds 1MB` | Agent ZIP too large | Remove unnecessary files, use `.gitignore` patterns | +| `invalid signature` | Wrong hotkey or corrupted signature | Verify your hotkey path and ensure it is registered | +| `empty agent_hash` | Missing agent hash in submission | Ensure the CLI computes the hash before submitting | +| `basilica_instance is empty` | Missing executor metadata | Check your CLI version and RPC connectivity | +| `failed to deserialize submission` | Malformed submission payload | Update to the latest CLI version | +| LLM API errors | API key invalid or rate limited | Verify `OPENAI_API_KEY` or equivalent is set correctly | + +### Debugging Tips + +1. **Check the leaderboard** — Use `term-cli --tab leaderboard` to see if your submission was scored +2. **Review agent logs** — Use `term-cli --tab evaluation` to see per-task results +3. **Test locally first** — Always validate your agent against sample tasks before submitting +4. **Monitor network health** — Use `term-cli --tab network` to verify validators are online diff --git a/docs/miner/submission.md b/docs/miner/submission.md new file mode 100644 index 00000000..ec88c68e --- /dev/null +++ b/docs/miner/submission.md @@ -0,0 +1,188 @@ +# Submission Guide + +This document covers the submission lifecycle, naming conventions, versioning, and the security review process for Term Challenge. + +--- + +## Submission Lifecycle + +```mermaid +flowchart TB + Register[Register Submission Name] --> Version[Version Assignment] + Version --> Upload[Upload Agent ZIP] + Upload --> Validate[WASM Validation] + Validate --> Review[Security Review] + Review --> LLM[LLM Code Review ×3] + Review --> AST[AST Structural Review ×3] + LLM --> Aggregate[Score Aggregation] + AST --> Aggregate + Aggregate --> Evaluate[SWE-bench Evaluation] + Evaluate --> Score[Final Score] + Score --> Weight[Weight Submission to Bittensor] +``` + +--- + +## Submission Naming + +### First-Register-Owns + +Submission names follow a **first-register-owns** policy: + +- The first miner to submit with a given name **owns** that name permanently +- Subsequent submissions from the same hotkey under that name create new versions +- Other miners **cannot** use a name that is already registered to a different hotkey +- Names are case-sensitive and must be non-empty + +### Naming Conventions + +- Use lowercase alphanumeric characters and hyphens: `my-agent-v2` +- Avoid special characters or spaces +- Choose a descriptive, unique name for your agent + +--- + +## Versioning + +### Auto-Increment + +Each submission under a registered name automatically receives an incrementing version number: + +| Submission | Version | Notes | +| --- | --- | --- | +| First submission of `my-agent` | `1` | Name registered to your hotkey | +| Second submission of `my-agent` | `2` | Auto-incremented | +| Third submission of `my-agent` | `3` | Auto-incremented | + +### History Tracking + +The network maintains a complete version history for each submission name: + +- All previous versions remain stored on-chain +- Each version includes the agent hash, epoch, and evaluation results +- You can query historical versions via the CLI or RPC + +### Storage Keys + +| Key Format | Content | +| --- | --- | +| `agent_code::` | Raw ZIP package bytes (≤ 1 MB) | +| `agent_hash::` | SHA256 hash of the agent package | +| `agent_logs::` | Serialized evaluation logs (≤ 256 KB) | + +--- + +## Security Review Process + +Every submission undergoes a two-phase security review before evaluation. This ensures submitted agent code is safe to execute and structurally sound. + +### Phase 1: LLM Code Review + +```mermaid +flowchart LR + Sub[Submission] --> Assign1[Assign 3 LLM Reviewers] + Assign1 --> R1[Reviewer 1: LLM Analysis] + Assign1 --> R2[Reviewer 2: LLM Analysis] + Assign1 --> R3[Reviewer 3: LLM Analysis] + R1 --> Score1[Score 0.0–1.0] + R2 --> Score2[Score 0.0–1.0] + R3 --> Score3[Score 0.0–1.0] + Score1 & Score2 & Score3 --> Avg[Average Score] +``` + +Three validators are deterministically selected to perform LLM-based code review: + +- The LLM analyzes the agent code for security issues, code quality, and correctness +- Each reviewer produces a score between 0.0 and 1.0 +- Scores below 0.5 flag the task result as failed +- The LLM judge URL is configured via `ChallengeParams.llm_judge_url` + +### Phase 2: AST Structural Review + +Three additional validators perform AST-based structural validation: + +- Parses the Python code into an abstract syntax tree +- Checks for prohibited patterns (e.g., network access outside allowed APIs, filesystem escape attempts) +- Validates the agent structure matches expected conventions +- Each reviewer produces a pass/fail score + +### Validator Selection + +Reviewers are selected deterministically using a seed derived from the submission ID: + +- **6 total reviewers**: 3 for LLM review + 3 for AST review +- Selection is based on the `ReviewAssignmentMessage` with a deterministic `seed` field +- All validators can independently verify the assignment is correct + +### Timeout and Replacement + +If a reviewer does not respond within the configured timeout: + +1. The reviewer's slot is marked as timed out +2. A `ReviewDeclineMessage` is broadcast (or inferred from timeout) +3. A replacement validator is selected from the remaining validator pool +4. The replacement validator performs the same review type + +--- + +## Review Stages and Timing + +| Stage | Duration | Description | +| --- | --- | --- | +| Submission | Immediate | Agent ZIP uploaded and broadcast to validators | +| Validation | ~1 block | WASM `validate()` checks signatures, rate limits, metadata | +| Consensus | ~2–3 blocks | >50% validator approval required | +| LLM Review | ~30–60s | 3 validators perform LLM code analysis | +| AST Review | ~10–30s | 3 validators perform structural analysis | +| Evaluation | ~5–15 min | Agent executed against 50 SWE-bench tasks | +| Scoring | ~1 block | Aggregate score computed and weight proposed | +| Weight Submission | Epoch boundary | Weights submitted to Bittensor chain | + +--- + +## Submission Constraints + +| Constraint | Value | Enforced By | +| --- | --- | --- | +| Package size | ≤ 1 MB (1,048,576 bytes) | `validate()` + `agent_storage` | +| Log size | ≤ 256 KB (262,144 bytes) | `agent_storage` | +| Task output preview | ≤ 4 KB (4,096 bytes) per task | `agent_storage::truncate_output` | +| Rate limit | 1 per 3 epochs | `validate()` via `last_submission` key | +| Max tasks | 256 | `validate()` + `evaluate()` | +| Signature | sr25519 (SS58 prefix 42) | `validate()` | +| Required fields | `agent_hash`, `miner_hotkey`, `signature`, `package_zip`, `basilica_instance`, `executor_url`, `executor_token` | `validate()` | + +--- + +## Troubleshooting Rejected Submissions + +### Validation Failures + +| Rejection Reason | Cause | Fix | +| --- | --- | --- | +| Empty `agent_hash` | Hash not computed | Ensure CLI computes hash before submission | +| Empty `miner_hotkey` | Hotkey not provided | Pass `--hotkey` flag to CLI | +| Empty `signature` | Signing failed | Check hotkey file permissions and format | +| Empty `package_zip` | ZIP file not loaded | Verify the ZIP file path exists | +| `package_zip` > 1 MB | Agent too large | Remove unnecessary files from the ZIP | +| Empty `basilica_instance` | Missing executor config | Update CLI to latest version | +| Empty `executor_url` | Missing executor config | Update CLI to latest version | +| Epoch rate limit | Submitted within 3 epochs | Wait for the cooldown period | +| Task count mismatch | Results don't match dataset | Ensure you solve all tasks in the active dataset | +| Invalid task result | Score out of range or empty ID | Validate scores are in [0.0, 1.0] range | + +### Review Failures + +| Issue | Cause | Fix | +| --- | --- | --- | +| Low LLM review score | Code quality concerns | Improve code structure, add comments, remove dead code | +| AST validation failure | Prohibited patterns detected | Remove disallowed imports or system calls | +| All reviewers timed out | Network congestion | Resubmit during lower traffic periods | + +### Post-Submission Issues + +| Issue | Cause | Fix | +| --- | --- | --- | +| Score is 0 | All tasks failed | Debug agent locally against SWE-bench tasks | +| Score decreasing | Decay mechanism active | Submit improved agent before grace period expires | +| Not on leaderboard | Submission not yet evaluated | Wait for evaluation to complete; check progress tab | diff --git a/wasm/src/agent_storage.rs b/wasm/src/agent_storage.rs index 59fd4e1a..d02d891c 100644 --- a/wasm/src/agent_storage.rs +++ b/wasm/src/agent_storage.rs @@ -2,7 +2,7 @@ use alloc::string::String; use alloc::vec::Vec; use platform_challenge_sdk_wasm::host_functions::{host_storage_get, host_storage_set}; -use crate::types::AgentLogs; +use crate::types::{AgentLogs, EvaluationStatus}; pub const MAX_AGENT_PACKAGE_SIZE: usize = 1_048_576; const MAX_LOG_SIZE: usize = 262_144; @@ -41,9 +41,6 @@ pub fn store_agent_logs(miner_hotkey: &str, epoch: u64, logs: &AgentLogs) -> boo host_storage_set(&key, &data).is_ok() } -/// Retrieve stored agent code for a miner/epoch. -/// Called by platform-v2's challenge route handler for `/agent/:hotkey/code` requests. -#[allow(dead_code)] pub fn get_agent_code(miner_hotkey: &str, epoch: u64) -> Option> { let key = make_key(b"agent_code:", miner_hotkey, epoch); let data = host_storage_get(&key).ok()?; @@ -53,9 +50,6 @@ pub fn get_agent_code(miner_hotkey: &str, epoch: u64) -> Option> { Some(data) } -/// Retrieve stored agent logs for a miner/epoch. -/// Called by platform-v2's challenge route handler for `/agent/:hotkey/logs` requests. -#[allow(dead_code)] pub fn get_agent_logs(miner_hotkey: &str, epoch: u64) -> Option { let key = make_key(b"agent_logs:", miner_hotkey, epoch); let data = host_storage_get(&key).ok()?; @@ -72,3 +66,20 @@ pub fn truncate_output(output: &str, max_len: usize) -> String { let truncated = &output[..max_len]; String::from(truncated) } + +pub fn store_evaluation_status(miner_hotkey: &str, epoch: u64, status: EvaluationStatus) -> bool { + let key = make_key(b"eval_status:", miner_hotkey, epoch); + if let Ok(data) = bincode::serialize(&status) { + return host_storage_set(&key, &data).is_ok(); + } + false +} + +pub fn get_evaluation_status(miner_hotkey: &str, epoch: u64) -> Option { + let key = make_key(b"eval_status:", miner_hotkey, epoch); + let data = host_storage_get(&key).ok()?; + if data.is_empty() { + return None; + } + bincode::deserialize(&data).ok() +} diff --git a/wasm/src/ast_validation.rs b/wasm/src/ast_validation.rs new file mode 100644 index 00000000..280b4680 --- /dev/null +++ b/wasm/src/ast_validation.rs @@ -0,0 +1,134 @@ +use alloc::string::String; +use alloc::vec::Vec; +use platform_challenge_sdk_wasm::host_functions::{host_storage_get, host_storage_set}; + +use crate::types::{AstReviewResult, WhitelistConfig}; + +pub fn get_whitelist_config() -> WhitelistConfig { + host_storage_get(b"ast_whitelist_config") + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default() +} + +pub fn set_whitelist_config(config: &WhitelistConfig) -> bool { + if let Ok(data) = bincode::serialize(config) { + return host_storage_set(b"ast_whitelist_config", &data).is_ok(); + } + false +} + +pub fn validate_python_code(code: &str, config: &WhitelistConfig) -> AstReviewResult { + let mut violations = Vec::new(); + + if code.len() > config.max_code_size { + violations.push(String::from("Code exceeds maximum allowed size")); + } + + for builtin in &config.forbidden_builtins { + let mut pattern = String::from(builtin.as_str()); + pattern.push('('); + if code.contains(pattern.as_str()) { + let mut msg = String::from("Forbidden builtin: "); + msg.push_str(builtin); + violations.push(msg); + } + } + + check_dangerous_patterns(code, &mut violations); + check_imports(code, config, &mut violations); + + AstReviewResult { + passed: violations.is_empty(), + violations, + reviewer_validators: Vec::new(), + } +} + +fn check_dangerous_patterns(code: &str, violations: &mut Vec) { + let dangerous = [ + ("os.system(", "Direct OS command execution"), + ("os.popen(", "OS pipe execution"), + ("subprocess.call(", "Subprocess execution"), + ("subprocess.Popen(", "Subprocess execution"), + ("subprocess.run(", "Subprocess execution"), + ("socket.socket(", "Raw socket access"), + ("__import__(", "Dynamic import"), + ]; + + for (pattern, desc) in &dangerous { + if code.contains(pattern) { + let mut msg = String::from("Dangerous pattern: "); + msg.push_str(desc); + msg.push_str(" ("); + msg.push_str(pattern); + msg.push(')'); + violations.push(msg); + } + } +} + +fn check_imports(code: &str, config: &WhitelistConfig, violations: &mut Vec) { + for line in code.lines() { + let trimmed = line.trim(); + + if let Some(rest) = trimmed.strip_prefix("import ") { + let modules_part = if let Some(idx) = rest.find(" as ") { + &rest[..idx] + } else { + rest + }; + for module in modules_part.split(',') { + let module = module.trim(); + let root = module.split('.').next().unwrap_or(module).trim(); + if !root.is_empty() && !is_module_allowed(root, config) { + let mut msg = String::from("Disallowed module: "); + msg.push_str(root); + violations.push(msg); + } + } + } + + if let Some(rest) = trimmed.strip_prefix("from ") { + if let Some(import_idx) = rest.find(" import ") { + let module = rest[..import_idx].trim(); + let root = module.split('.').next().unwrap_or(module).trim(); + if !root.is_empty() && !is_module_allowed(root, config) { + let mut msg = String::from("Disallowed module: "); + msg.push_str(root); + violations.push(msg); + } + } + } + } +} + +fn is_module_allowed(module: &str, config: &WhitelistConfig) -> bool { + config.allowed_stdlib.iter().any(|s| s == module) + || config.allowed_third_party.iter().any(|s| s == module) +} + +pub fn store_ast_result(submission_id: &str, result: &AstReviewResult) -> bool { + let mut key = Vec::from(b"ast_review:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + if let Ok(data) = bincode::serialize(result) { + return host_storage_set(&key, &data).is_ok(); + } + false +} + +pub fn get_ast_result(submission_id: &str) -> Option { + let mut key = Vec::from(b"ast_review:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + let data = host_storage_get(&key).ok()?; + if data.is_empty() { + return None; + } + bincode::deserialize(&data).ok() +} diff --git a/wasm/src/dataset.rs b/wasm/src/dataset.rs index 0f741485..c35197be 100644 --- a/wasm/src/dataset.rs +++ b/wasm/src/dataset.rs @@ -1,10 +1,115 @@ -//! Dataset selection and consensus logic (reserved for future P2P consensus implementation) +use alloc::string::String; +use alloc::vec::Vec; +use platform_challenge_sdk_wasm::host_functions::{ + host_random_seed, host_storage_get, host_storage_set, +}; -// Note: Dataset consensus functions removed as the feature is not yet integrated. -// The active dataset is currently managed via the ChallengeParams passed to evaluate(). +use crate::types::DatasetSelection; -// When P2P dataset consensus is implemented, this module will contain: -// - Random task index selection using host_random_seed -// - Validator selection storage via host_storage_set -// - Consensus building (>50% agreement) on task indices -// - Dataset selection serialization with hash verification +const DATASET_PROPOSALS_KEY: &[u8] = b"dataset_proposals"; + +pub fn propose_task_indices(validator_id: &str, indices: &[u32]) -> bool { + let mut proposals: Vec<(String, Vec)> = host_storage_get(DATASET_PROPOSALS_KEY) + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default(); + + if let Some(pos) = proposals.iter().position(|(v, _)| v == validator_id) { + proposals[pos].1 = indices.to_vec(); + } else { + proposals.push((String::from(validator_id), indices.to_vec())); + } + + if let Ok(data) = bincode::serialize(&proposals) { + return host_storage_set(DATASET_PROPOSALS_KEY, &data).is_ok(); + } + false +} + +pub fn check_dataset_consensus() -> Option> { + let proposals: Vec<(String, Vec)> = host_storage_get(DATASET_PROPOSALS_KEY) + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default(); + + if proposals.is_empty() { + return None; + } + + let validator_count = proposals.len(); + let threshold = (validator_count / 2) + 1; + + let mut counts: Vec<(Vec, usize)> = Vec::new(); + for (_, indices) in &proposals { + let mut sorted = indices.clone(); + sorted.sort(); + if let Some(entry) = counts.iter_mut().find(|(k, _)| *k == sorted) { + entry.1 += 1; + } else { + counts.push((sorted, 1)); + } + } + + for (indices, count) in counts { + if count >= threshold { + return Some(indices); + } + } + None +} + +pub fn generate_random_indices(total_tasks: u32, select_count: u32) -> Vec { + let mut seed = [0u8; 32]; + let _ = host_random_seed(&mut seed); + + let count = select_count.min(total_tasks) as usize; + let mut indices = Vec::with_capacity(count); + let mut used = Vec::new(); + + for i in 0..count { + let idx_bytes = if i * 4 + 4 <= seed.len() { + let mut buf = [0u8; 4]; + buf.copy_from_slice(&seed[i * 4..i * 4 + 4]); + u32::from_le_bytes(buf) + } else { + seed[i % seed.len()] as u32 + }; + + let mut idx = idx_bytes % total_tasks; + let mut attempts = 0; + while used.contains(&idx) && attempts < total_tasks { + idx = (idx + 1) % total_tasks; + attempts += 1; + } + if !used.contains(&idx) { + used.push(idx); + indices.push(idx); + } + } + indices +} + +pub fn get_dataset_history() -> Vec { + host_storage_get(b"dataset_history") + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default() +} diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 0f4fb615..98c73164 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -3,10 +3,14 @@ extern crate alloc; mod agent_storage; +mod ast_validation; mod dataset; +mod llm_review; mod routes; mod scoring; +mod submission; mod tasks; +mod timeout_handler; mod types; use alloc::string::String; @@ -17,10 +21,10 @@ use platform_challenge_sdk_wasm::host_functions::{ }; use platform_challenge_sdk_wasm::{Challenge, EvaluationInput, EvaluationOutput}; -use crate::scoring::{calculate_aggregate, format_summary, to_weight}; +use crate::scoring::{apply_decay, calculate_aggregate, format_summary, to_weight}; use crate::types::{ - AgentLogEntry, AgentLogs, ChallengeParams, DatasetSelection, LlmJudgeRequest, LlmJudgeResponse, - Submission, TaskResult, + AgentLogEntry, AgentLogs, ChallengeParams, DatasetSelection, EvaluationStatus, LlmJudgeRequest, + LlmJudgeResponse, Submission, TaskResult, WasmRouteRequest, }; const MAX_SUBMISSION_SIZE: u64 = 64 * 1024 * 1024; @@ -82,6 +86,20 @@ fn set_last_submission_epoch(miner_hotkey: &str, epoch: u64) { let _ = host_storage_set(&key, &epoch.to_le_bytes()); } +fn store_score(hotkey: &str, score: f64) { + let mut key = Vec::from(b"score:" as &[u8]); + key.extend_from_slice(hotkey.as_bytes()); + let _ = host_storage_set(&key, &score.to_le_bytes()); +} + +fn store_submission_record(hotkey: &str, epoch: u64, agent_hash: &str) { + let mut key = Vec::from(b"submission:" as &[u8]); + key.extend_from_slice(hotkey.as_bytes()); + key.push(b':'); + key.extend_from_slice(&epoch.to_le_bytes()); + let _ = host_storage_set(&key, agent_hash.as_bytes()); +} + pub struct TermChallengeWasm; impl Default for TermChallengeWasm { @@ -126,6 +144,19 @@ impl TermChallengeWasm { Some(judge_resp.score.clamp(0.0, 1.0)) } + + pub fn routes(&self) -> Vec { + let defs = routes::get_route_definitions(); + bincode::serialize(&defs).unwrap_or_default() + } + + pub fn handle_route(&self, request_data: &[u8]) -> Vec { + let request: WasmRouteRequest = match bincode::deserialize(request_data) { + Ok(r) => r, + Err(_) => return Vec::new(), + }; + routes::handle_route_request(&request) + } } impl Challenge for TermChallengeWasm { @@ -138,7 +169,7 @@ impl Challenge for TermChallengeWasm { } fn evaluate(&self, input: EvaluationInput) -> EvaluationOutput { - let submission: Submission = + let submission_data: Submission = match bincode_options_submission().deserialize(&input.agent_data) { Ok(s) => s, Err(_) => return EvaluationOutput::failure("failed to deserialize submission"), @@ -149,19 +180,19 @@ impl Challenge for TermChallengeWasm { Err(_) => return EvaluationOutput::failure("failed to deserialize challenge params"), }; - if submission.task_results.is_empty() { + if submission_data.task_results.is_empty() { return EvaluationOutput::failure("submission contains no task results"); } - if submission.task_results.len() > MAX_TASKS { + if submission_data.task_results.len() > MAX_TASKS { return EvaluationOutput::failure("submission exceeds maximum task count"); } - if submission.task_results.len() != params.tasks.len() { + if submission_data.task_results.len() != params.tasks.len() { return EvaluationOutput::failure("task result count does not match task definitions"); } - for result in &submission.task_results { + for result in &submission_data.task_results { if !validate_task_result(result) { return EvaluationOutput::failure( "invalid task result: bad score or empty task_id", @@ -169,11 +200,59 @@ impl Challenge for TermChallengeWasm { } } - let miner_hotkey = submission.miner_hotkey; - let epoch = submission.epoch; - let agent_hash = submission.agent_hash; - let package_zip = submission.package_zip; - let mut results: Vec = submission.task_results; + let miner_hotkey = submission_data.miner_hotkey; + let epoch = submission_data.epoch; + let agent_hash = submission_data.agent_hash; + let package_zip = submission_data.package_zip; + let mut results: Vec = submission_data.task_results; + + let _ = + agent_storage::store_evaluation_status(&miner_hotkey, epoch, EvaluationStatus::Pending); + + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::AstReview, + ); + let whitelist_config = ast_validation::get_whitelist_config(); + let code_str = core::str::from_utf8(&package_zip).unwrap_or(""); + let ast_result = ast_validation::validate_python_code(code_str, &whitelist_config); + let _ = ast_validation::store_ast_result(&agent_hash, &ast_result); + if !ast_result.passed { + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::Failed, + ); + return EvaluationOutput::failure("AST validation failed"); + } + + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::LlmReview, + ); + if let Some(ref url) = params.llm_judge_url { + if let Some(review_result) = llm_review::run_llm_review(code_str, url) { + let _ = llm_review::store_review_result(&agent_hash, &review_result); + if !review_result.approved { + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::Failed, + ); + return EvaluationOutput::failure("LLM review rejected submission"); + } + } + } + + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::Evaluating, + ); + + let _ = submission::submit_versioned(&miner_hotkey, &miner_hotkey, &agent_hash, epoch); if let Some(ref url) = params.llm_judge_url { for (result, task) in results.iter_mut().zip(params.tasks.iter()) { @@ -191,12 +270,29 @@ impl Challenge for TermChallengeWasm { let aggregate = calculate_aggregate(¶ms.tasks, &results); let weight = to_weight(&aggregate); - let score = (weight * 10_000.0) as i64; + + let final_weight = if let Some(ref decay_params) = params.decay_params { + let epoch_decayed = scoring::apply_epoch_decay(weight, decay_params); + if let Some(state) = scoring::get_top_agent_state() { + apply_decay(epoch_decayed, state.epochs_stale as f64, decay_params) + } else { + epoch_decayed + } + } else { + weight + }; + + let score = (final_weight * 10_000.0) as i64; let message = format_summary(&aggregate); let _ = agent_storage::store_agent_code(&miner_hotkey, epoch, &package_zip); let _ = agent_storage::store_agent_hash(&miner_hotkey, epoch, &agent_hash); + let _ = scoring::update_top_agent_state(&agent_hash, final_weight, epoch); + + store_score(&miner_hotkey, final_weight); + store_submission_record(&miner_hotkey, epoch, &agent_hash); + let mut entries = Vec::with_capacity(results.len()); let mut total_size_bytes: u64 = 0; for r in &results { @@ -226,11 +322,17 @@ impl Challenge for TermChallengeWasm { set_last_submission_epoch(&miner_hotkey, epoch); + let _ = agent_storage::store_evaluation_status( + &miner_hotkey, + epoch, + EvaluationStatus::Completed, + ); + EvaluationOutput::success(score, &message) } fn validate(&self, input: EvaluationInput) -> bool { - let submission: Submission = + let submission_data: Submission = match bincode_options_submission().deserialize(&input.agent_data) { Ok(s) => s, Err(_) => return false, @@ -241,32 +343,32 @@ impl Challenge for TermChallengeWasm { Err(_) => return false, }; - if submission.agent_hash.is_empty() || submission.miner_hotkey.is_empty() { + if submission_data.agent_hash.is_empty() || submission_data.miner_hotkey.is_empty() { return false; } - if submission.signature.is_empty() { + if submission_data.signature.is_empty() { return false; } - if submission.package_zip.is_empty() { + if submission_data.package_zip.is_empty() { return false; } - if submission.package_zip.len() > 1_048_576 { + if submission_data.package_zip.len() > 1_048_576 { return false; } - if submission.basilica_instance.is_empty() - || submission.executor_url.is_empty() - || submission.executor_token.is_empty() + if submission_data.basilica_instance.is_empty() + || submission_data.executor_url.is_empty() + || submission_data.executor_token.is_empty() { return false; } let current_epoch = host_consensus_get_epoch(); if current_epoch >= 0 { - if let Some(last_epoch) = get_last_submission_epoch(&submission.miner_hotkey) { + if let Some(last_epoch) = get_last_submission_epoch(&submission_data.miner_hotkey) { let current = current_epoch as u64; if current < last_epoch.saturating_add(EPOCH_RATE_LIMIT) { return false; @@ -274,19 +376,19 @@ impl Challenge for TermChallengeWasm { } } - if submission.task_results.is_empty() { + if submission_data.task_results.is_empty() { return false; } - if submission.task_results.len() > MAX_TASKS { + if submission_data.task_results.len() > MAX_TASKS { return false; } - if submission.task_results.len() != params.tasks.len() { + if submission_data.task_results.len() != params.tasks.len() { return false; } - for result in &submission.task_results { + for result in &submission_data.task_results { if !validate_task_result(result) { return false; } diff --git a/wasm/src/llm_review.rs b/wasm/src/llm_review.rs new file mode 100644 index 00000000..2af1e383 --- /dev/null +++ b/wasm/src/llm_review.rs @@ -0,0 +1,192 @@ +use alloc::string::String; +use alloc::vec::Vec; +use core::fmt::Write as _; +use platform_challenge_sdk_wasm::host_functions::{ + host_http_post, host_random_seed, host_storage_get, host_storage_set, +}; + +use crate::types::{LlmMessage, LlmRequest, LlmResponse, LlmReviewResult}; + +const DEFAULT_LLM_MODEL: &str = "moonshotai/Kimi-K2.5-TEE"; + +const DEFAULT_SYSTEM_PROMPT: &str = "You are a strict security code reviewer for a terminal-based AI agent challenge.\n\nYour task is to analyze Python agent code and determine if it complies with the validation rules.\n\nRules:\n1. No hardcoded API keys or secrets\n2. No malicious code patterns\n3. No attempts to exploit the evaluation environment\n4. Code must be original (no plagiarism)\n\nRespond with a JSON object: {\"approved\": true/false, \"reason\": \"...\", \"violations\": []}"; + +pub fn is_llm_available() -> bool { + host_storage_get(b"llm_enabled") + .ok() + .map(|d| !d.is_empty() && d[0] == 1) + .unwrap_or(false) +} + +pub fn select_reviewers(validators_json: &[u8], submission_hash: &[u8], offset: u8) -> Vec { + let validators: Vec = match bincode::deserialize(validators_json) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + + if validators.len() < 3 { + return validators; + } + + let mut seed = [0u8; 32]; + let _ = host_random_seed(&mut seed); + for (i, b) in submission_hash.iter().enumerate() { + if i < 32 { + seed[i] ^= b; + } + } + if !seed.is_empty() { + seed[0] = seed[0].wrapping_add(offset); + } + + let n = validators.len(); + let mut selected = Vec::with_capacity(3); + let mut used = Vec::new(); + + for i in 0..3 { + let idx_bytes = if i * 4 + 4 <= seed.len() { + let mut buf = [0u8; 4]; + buf.copy_from_slice(&seed[i * 4..i * 4 + 4]); + u32::from_le_bytes(buf) as usize + } else { + (seed[i % seed.len()] as usize).wrapping_mul(i + 1) + }; + + let mut idx = idx_bytes % n; + let mut attempts = 0; + while used.contains(&idx) && attempts < n { + idx = (idx + 1) % n; + attempts += 1; + } + if !used.contains(&idx) { + used.push(idx); + selected.push(validators[idx].clone()); + } + } + selected +} + +pub fn run_llm_review(agent_code: &str, llm_url: &str) -> Option { + if !is_llm_available() { + return None; + } + + let redacted_code = redact_api_keys(agent_code); + + let mut prompt = String::new(); + let _ = write!( + prompt, + "Review the following Python agent code:\n\n```python\n{}\n```\n\nProvide your verdict as JSON: {{\"approved\": true/false, \"reason\": \"...\", \"violations\": []}}", + redacted_code + ); + + let request = LlmRequest { + model: String::from(DEFAULT_LLM_MODEL), + messages: alloc::vec![ + LlmMessage { + role: String::from("system"), + content: String::from(DEFAULT_SYSTEM_PROMPT), + }, + LlmMessage { + role: String::from("user"), + content: prompt, + }, + ], + max_tokens: 2048, + temperature: 0.1, + }; + + let request_bytes = bincode::serialize(&request).ok()?; + let response_bytes = host_http_post(llm_url.as_bytes(), &request_bytes).ok()?; + let response: LlmResponse = bincode::deserialize(&response_bytes).ok()?; + + parse_llm_verdict(&response.content) +} + +fn parse_llm_verdict(content: &str) -> Option { + let json_start = content.find('{')?; + let json_end = content.rfind('}')? + 1; + if json_start >= json_end { + return None; + } + let json_str = &content[json_start..json_end]; + + let approved = + json_str.contains("\"approved\": true") || json_str.contains("\"approved\":true"); + + let reason = extract_json_string(json_str, "reason").unwrap_or_default(); + + Some(LlmReviewResult { + approved, + reason, + violations: Vec::new(), + reviewer_validators: Vec::new(), + scores: Vec::new(), + }) +} + +fn extract_json_string(json: &str, key: &str) -> Option { + let mut search = String::from("\""); + search.push_str(key); + search.push_str("\": \""); + let start = json.find(search.as_str())? + search.len(); + let rest = &json[start..]; + let end = rest.find('"')?; + Some(String::from(&rest[..end])) +} + +fn redact_api_keys(code: &str) -> String { + let mut result = String::from(code); + if result.len() > 50_000 { + result.truncate(50_000); + result.push_str("\n... [truncated]"); + } + result +} + +pub fn store_review_result(submission_id: &str, result: &LlmReviewResult) -> bool { + let mut key = Vec::from(b"llm_review:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + if let Ok(data) = bincode::serialize(result) { + return host_storage_set(&key, &data).is_ok(); + } + false +} + +pub fn get_review_result(submission_id: &str) -> Option { + let mut key = Vec::from(b"llm_review:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + let data = host_storage_get(&key).ok()?; + if data.is_empty() { + return None; + } + bincode::deserialize(&data).ok() +} + +pub fn aggregate_reviews(results: &[LlmReviewResult]) -> LlmReviewResult { + let approved_count = results.iter().filter(|r| r.approved).count(); + let total = results.len(); + let approved = total > 0 && approved_count * 2 > total; + + let mut all_violations = Vec::new(); + let mut all_validators = Vec::new(); + let mut all_scores = Vec::new(); + let mut reason = String::new(); + + for r in results { + all_violations.extend(r.violations.iter().cloned()); + all_validators.extend(r.reviewer_validators.iter().cloned()); + all_scores.extend(r.scores.iter().copied()); + if !r.reason.is_empty() && reason.is_empty() { + reason = r.reason.clone(); + } + } + + LlmReviewResult { + approved, + reason, + violations: all_violations, + reviewer_validators: all_validators, + scores: all_scores, + } +} diff --git a/wasm/src/routes.rs b/wasm/src/routes.rs index ab548ed8..77b85f0e 100644 --- a/wasm/src/routes.rs +++ b/wasm/src/routes.rs @@ -1,21 +1,18 @@ -//! Route definitions for the term-challenge module -//! -//! These routes are designed to be queried via platform-v2's `challenge_call` RPC method. -//! When the challenge SDK's route integration is complete, these routes will be registered -//! automatically. Until then, validators can access this data via direct storage queries. - use alloc::string::String; use alloc::vec; use alloc::vec::Vec; +use platform_challenge_sdk_wasm::host_functions::{ + host_consensus_get_epoch, host_consensus_get_submission_count, host_storage_get, +}; -use crate::types::RouteDefinition; +use crate::types::{ + LeaderboardEntry, RouteDefinition, StatsResponse, TimeoutConfig, TopAgentState, + WasmRouteRequest, WhitelistConfig, +}; +use crate::{ + agent_storage, ast_validation, dataset, llm_review, scoring, submission, timeout_handler, +}; -/// Returns route definitions for the term-challenge module. -/// -/// Note: This function is currently unused pending integration with platform-v2's -/// challenge route registration system. The routes are defined here for documentation -/// and future automatic registration. -#[allow(dead_code)] pub fn get_route_definitions() -> Vec { vec![ RouteDefinition { @@ -70,5 +67,352 @@ pub fn get_route_definitions() -> Vec { path: String::from("/agent/:hotkey/logs"), description: String::from("Returns evaluation logs for a miner"), }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/agent/:hotkey/journey"), + description: String::from("Returns evaluation status journey for a miner"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/review/:id"), + description: String::from("Returns LLM review result for a submission"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/ast/:id"), + description: String::from("Returns AST validation result for a submission"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/submission/:name"), + description: String::from("Returns submission info by name"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/config/timeout"), + description: String::from("Returns current timeout configuration"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/config/timeout"), + description: String::from("Updates timeout configuration"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/config/whitelist"), + description: String::from("Returns current AST whitelist configuration"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/config/whitelist"), + description: String::from("Updates AST whitelist configuration"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/dataset/propose"), + description: String::from("Propose task indices for dataset consensus"), + }, + RouteDefinition { + method: String::from("GET"), + path: String::from("/dataset/consensus"), + description: String::from("Check dataset consensus status"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/review/select"), + description: String::from("Select reviewers for a submission"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/review/aggregate"), + description: String::from("Aggregate multiple review results"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/timeout/record"), + description: String::from("Record a review assignment for timeout tracking"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/timeout/check"), + description: String::from("Check if a review assignment has timed out"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/dataset/random"), + description: String::from("Generate random task indices"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/timeout/replace"), + description: String::from("Select a replacement validator for a timed-out review"), + }, + RouteDefinition { + method: String::from("POST"), + path: String::from("/timeout/mark"), + description: String::from("Mark a review assignment as timed out"), + }, ] } + +pub fn handle_route_request(request: &WasmRouteRequest) -> Vec { + let path = request.path.as_str(); + let method = request.method.as_str(); + + match (method, path) { + ("GET", "/leaderboard") => handle_leaderboard(), + ("GET", "/stats") => handle_stats(), + ("GET", "/decay") => handle_decay(), + ("GET", "/dataset/history") => handle_dataset_history(), + ("GET", "/dataset/consensus") => handle_dataset_consensus(), + ("GET", "/config/timeout") => handle_get_timeout_config(), + ("GET", "/config/whitelist") => handle_get_whitelist_config(), + ("POST", "/config/timeout") => handle_set_timeout_config(&request.body), + ("POST", "/config/whitelist") => handle_set_whitelist_config(&request.body), + ("POST", "/dataset/propose") => handle_dataset_propose(&request.body), + ("POST", "/dataset/random") => handle_dataset_random(&request.body), + ("POST", "/review/select") => handle_review_select(&request.body), + ("POST", "/review/aggregate") => handle_review_aggregate(&request.body), + ("POST", "/timeout/record") => handle_timeout_record(&request.body), + ("POST", "/timeout/check") => handle_timeout_check(&request.body), + ("POST", "/timeout/replace") => handle_timeout_replace(&request.body), + ("POST", "/timeout/mark") => handle_timeout_mark(&request.body), + _ => { + if method == "GET" { + if let Some(id) = path.strip_prefix("/review/") { + return handle_review(id); + } + if let Some(id) = path.strip_prefix("/ast/") { + return handle_ast(id); + } + if let Some(name) = path.strip_prefix("/submission/") { + return handle_submission_by_name(name); + } + if let Some(rest) = path.strip_prefix("/agent/") { + if let Some(hotkey) = rest.strip_suffix("/journey") { + return handle_journey(hotkey); + } + if let Some(hotkey) = rest.strip_suffix("/logs") { + return handle_logs(hotkey); + } + if let Some(hotkey) = rest.strip_suffix("/code") { + return handle_code(hotkey); + } + } + } + Vec::new() + } + } +} + +fn handle_leaderboard() -> Vec { + let entries: Vec = host_storage_get(b"leaderboard") + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default(); + bincode::serialize(&entries).unwrap_or_default() +} + +fn handle_stats() -> Vec { + let total_submissions = host_consensus_get_submission_count() as u64; + let epoch = host_consensus_get_epoch(); + let active_miners = host_storage_get(b"active_miner_count") + .ok() + .and_then(|d| { + if d.len() >= 8 { + let mut buf = [0u8; 8]; + buf.copy_from_slice(&d[..8]); + Some(u64::from_le_bytes(buf)) + } else { + None + } + }) + .unwrap_or(0); + let validator_count = host_storage_get(b"validator_count") + .ok() + .and_then(|d| { + if d.len() >= 8 { + let mut buf = [0u8; 8]; + buf.copy_from_slice(&d[..8]); + Some(u64::from_le_bytes(buf)) + } else { + None + } + }) + .unwrap_or(0); + + let stats = StatsResponse { + total_submissions, + active_miners, + validator_count, + }; + let _ = epoch; + bincode::serialize(&stats).unwrap_or_default() +} + +fn handle_decay() -> Vec { + let state: Option = scoring::get_top_agent_state(); + bincode::serialize(&state).unwrap_or_default() +} + +fn handle_dataset_history() -> Vec { + let history = dataset::get_dataset_history(); + bincode::serialize(&history).unwrap_or_default() +} + +fn handle_review(id: &str) -> Vec { + let result = llm_review::get_review_result(id); + bincode::serialize(&result).unwrap_or_default() +} + +fn handle_ast(id: &str) -> Vec { + let result = ast_validation::get_ast_result(id); + bincode::serialize(&result).unwrap_or_default() +} + +fn handle_submission_by_name(name: &str) -> Vec { + let result = submission::get_submission_by_name(name); + bincode::serialize(&result).unwrap_or_default() +} + +fn handle_journey(hotkey: &str) -> Vec { + let epoch = host_consensus_get_epoch(); + let current_epoch = if epoch >= 0 { epoch as u64 } else { 0 }; + let status = agent_storage::get_evaluation_status(hotkey, current_epoch); + bincode::serialize(&status).unwrap_or_default() +} + +fn handle_logs(hotkey: &str) -> Vec { + let epoch = host_consensus_get_epoch(); + let current_epoch = if epoch >= 0 { epoch as u64 } else { 0 }; + let logs = agent_storage::get_agent_logs(hotkey, current_epoch); + bincode::serialize(&logs).unwrap_or_default() +} + +fn handle_code(hotkey: &str) -> Vec { + let epoch = host_consensus_get_epoch(); + let current_epoch = if epoch >= 0 { epoch as u64 } else { 0 }; + agent_storage::get_agent_code(hotkey, current_epoch).unwrap_or_default() +} + +fn handle_get_timeout_config() -> Vec { + let config = timeout_handler::get_timeout_config(); + bincode::serialize(&config).unwrap_or_default() +} + +fn handle_set_timeout_config(body: &[u8]) -> Vec { + if let Ok(config) = bincode::deserialize::(body) { + let ok = timeout_handler::set_timeout_config(&config); + bincode::serialize(&ok).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} + +fn handle_get_whitelist_config() -> Vec { + let config = ast_validation::get_whitelist_config(); + bincode::serialize(&config).unwrap_or_default() +} + +fn handle_set_whitelist_config(body: &[u8]) -> Vec { + if let Ok(config) = bincode::deserialize::(body) { + let ok = ast_validation::set_whitelist_config(&config); + bincode::serialize(&ok).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} + +fn handle_dataset_consensus() -> Vec { + let result = dataset::check_dataset_consensus(); + bincode::serialize(&result).unwrap_or_default() +} + +fn handle_dataset_propose(body: &[u8]) -> Vec { + if let Ok((validator_id, indices)) = bincode::deserialize::<(String, Vec)>(body) { + let ok = dataset::propose_task_indices(&validator_id, &indices); + bincode::serialize(&ok).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} + +fn handle_dataset_random(body: &[u8]) -> Vec { + if let Ok((total_tasks, select_count)) = bincode::deserialize::<(u32, u32)>(body) { + let indices = dataset::generate_random_indices(total_tasks, select_count); + bincode::serialize(&indices).unwrap_or_default() + } else { + Vec::new() + } +} + +fn handle_review_select(body: &[u8]) -> Vec { + if let Ok((validators_json, submission_hash, offset)) = + bincode::deserialize::<(Vec, Vec, u8)>(body) + { + let reviewers = llm_review::select_reviewers(&validators_json, &submission_hash, offset); + bincode::serialize(&reviewers).unwrap_or_default() + } else { + Vec::new() + } +} + +fn handle_review_aggregate(body: &[u8]) -> Vec { + if let Ok(results) = bincode::deserialize::>(body) { + let aggregated = llm_review::aggregate_reviews(&results); + bincode::serialize(&aggregated).unwrap_or_default() + } else { + Vec::new() + } +} + +fn handle_timeout_record(body: &[u8]) -> Vec { + if let Ok((submission_id, validator, review_type)) = + bincode::deserialize::<(String, String, String)>(body) + { + let ok = timeout_handler::record_assignment(&submission_id, &validator, &review_type); + bincode::serialize(&ok).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} + +fn handle_timeout_check(body: &[u8]) -> Vec { + if let Ok((submission_id, validator, review_type, timeout_ms)) = + bincode::deserialize::<(String, String, String, u64)>(body) + { + let timed_out = + timeout_handler::check_timeout(&submission_id, &validator, &review_type, timeout_ms); + bincode::serialize(&timed_out).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} + +fn handle_timeout_replace(body: &[u8]) -> Vec { + if let Ok((validators, excluded, seed)) = + bincode::deserialize::<(Vec, Vec, Vec)>(body) + { + let replacement = timeout_handler::select_replacement(&validators, &excluded, &seed); + bincode::serialize(&replacement).unwrap_or_default() + } else { + bincode::serialize(&Option::::None).unwrap_or_default() + } +} + +fn handle_timeout_mark(body: &[u8]) -> Vec { + if let Ok((submission_id, validator, review_type)) = + bincode::deserialize::<(String, String, String)>(body) + { + let ok = timeout_handler::mark_timed_out(&submission_id, &validator, &review_type); + bincode::serialize(&ok).unwrap_or_default() + } else { + bincode::serialize(&false).unwrap_or_default() + } +} diff --git a/wasm/src/scoring.rs b/wasm/src/scoring.rs index 515efad0..f57e3343 100644 --- a/wasm/src/scoring.rs +++ b/wasm/src/scoring.rs @@ -1,7 +1,14 @@ use alloc::string::String; use core::fmt::Write as _; +use platform_challenge_sdk_wasm::host_functions::{ + host_consensus_get_epoch, host_storage_get, host_storage_set, +}; -use crate::types::{DecayParams, Difficulty, DifficultyStats, TaskDefinition, TaskResult}; +use crate::types::{ + DecayParams, Difficulty, DifficultyStats, TaskDefinition, TaskResult, TopAgentState, +}; + +const TOP_AGENT_KEY: &[u8] = b"top_agent_state"; pub struct AggregateScore { pub tasks_passed: u32, @@ -80,10 +87,6 @@ pub fn to_weight(score: &AggregateScore) -> f64 { } /// Apply decay to weight based on hours since top score. -/// -/// Note: This function is reserved for future use when decay mechanics are -/// integrated with the scoring system via challenge configuration. -#[allow(dead_code)] pub fn apply_decay(weight: f64, hours_since_top: f64, params: &DecayParams) -> f64 { let grace = params.grace_period_hours as f64; if hours_since_top <= grace { @@ -134,3 +137,60 @@ pub fn format_summary(score: &AggregateScore) -> String { let _ = write!(msg, " time={}ms", score.total_execution_time_ms); msg } + +pub fn get_top_agent_state() -> Option { + let data = host_storage_get(TOP_AGENT_KEY).ok()?; + if data.is_empty() { + return None; + } + bincode::deserialize(&data).ok() +} + +pub fn update_top_agent_state(agent_hash: &str, score: f64, epoch: u64) -> bool { + let current = get_top_agent_state(); + let should_update = match ¤t { + Some(state) => score > state.score, + None => true, + }; + + if should_update { + let state = TopAgentState { + agent_hash: String::from(agent_hash), + score, + achieved_epoch: epoch, + epochs_stale: 0, + decay_active: false, + current_burn_percent: 0.0, + }; + if let Ok(data) = bincode::serialize(&state) { + return host_storage_set(TOP_AGENT_KEY, &data).is_ok(); + } + } else if let Some(mut state) = current { + let current_epoch = host_consensus_get_epoch(); + if current_epoch >= 0 { + state.epochs_stale = (current_epoch as u64).saturating_sub(state.achieved_epoch); + let grace_epochs = 60u64; + state.decay_active = state.epochs_stale > grace_epochs; + if state.decay_active { + let decay_epochs = state.epochs_stale.saturating_sub(grace_epochs); + let half_life_epochs = 20.0f64; + let multiplier = 0.5f64.powf(decay_epochs as f64 / half_life_epochs); + state.current_burn_percent = (1.0 - multiplier) * 100.0; + } + if let Ok(data) = bincode::serialize(&state) { + let _ = host_storage_set(TOP_AGENT_KEY, &data); + } + } + } + false +} + +pub fn apply_epoch_decay(weight: f64, params: &DecayParams) -> f64 { + if let Some(state) = get_top_agent_state() { + if state.decay_active { + let multiplier = 1.0 - (state.current_burn_percent / 100.0); + return weight * multiplier.max(params.min_multiplier); + } + } + weight +} diff --git a/wasm/src/submission.rs b/wasm/src/submission.rs new file mode 100644 index 00000000..aa14d2cc --- /dev/null +++ b/wasm/src/submission.rs @@ -0,0 +1,102 @@ +use alloc::string::String; +use alloc::vec::Vec; +use platform_challenge_sdk_wasm::host_functions::{ + host_consensus_get_epoch, host_storage_get, host_storage_set, +}; + +use crate::types::{SubmissionName, SubmissionVersion}; + +pub fn register_submission_name(name: &str, hotkey: &str) -> bool { + let mut key = Vec::from(b"name_registry:" as &[u8]); + key.extend_from_slice(name.as_bytes()); + + if let Ok(data) = host_storage_get(&key) { + if !data.is_empty() { + if let Ok(existing) = bincode::deserialize::(&data) { + return existing.owner_hotkey == hotkey; + } + return false; + } + } + + let epoch = host_consensus_get_epoch(); + let entry = SubmissionName { + name: String::from(name), + owner_hotkey: String::from(hotkey), + registered_epoch: if epoch >= 0 { epoch as u64 } else { 0 }, + }; + if let Ok(data) = bincode::serialize(&entry) { + return host_storage_set(&key, &data).is_ok(); + } + false +} + +pub fn submit_versioned(name: &str, hotkey: &str, agent_hash: &str, epoch: u64) -> Option { + if !register_submission_name(name, hotkey) { + return None; + } + + let mut key = Vec::from(b"submission_versions:" as &[u8]); + key.extend_from_slice(hotkey.as_bytes()); + key.push(b':'); + key.extend_from_slice(name.as_bytes()); + + let mut versions: Vec = host_storage_get(&key) + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default(); + + let next_version = versions.last().map(|v| v.version + 1).unwrap_or(1); + versions.push(SubmissionVersion { + version: next_version, + agent_hash: String::from(agent_hash), + epoch, + score: None, + }); + + if let Ok(data) = bincode::serialize(&versions) { + if host_storage_set(&key, &data).is_ok() { + return Some(next_version); + } + } + None +} + +pub fn get_submission_history(hotkey: &str, name: &str) -> Vec { + let mut key = Vec::from(b"submission_versions:" as &[u8]); + key.extend_from_slice(hotkey.as_bytes()); + key.push(b':'); + key.extend_from_slice(name.as_bytes()); + + host_storage_get(&key) + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default() +} + +pub fn get_submission_by_name(name: &str) -> Option<(String, SubmissionVersion)> { + let mut key = Vec::from(b"name_registry:" as &[u8]); + key.extend_from_slice(name.as_bytes()); + + let data = host_storage_get(&key).ok()?; + if data.is_empty() { + return None; + } + let entry: SubmissionName = bincode::deserialize(&data).ok()?; + + let versions = get_submission_history(&entry.owner_hotkey, name); + let latest = versions.last()?.clone(); + Some((entry.owner_hotkey, latest)) +} diff --git a/wasm/src/timeout_handler.rs b/wasm/src/timeout_handler.rs new file mode 100644 index 00000000..b98e6445 --- /dev/null +++ b/wasm/src/timeout_handler.rs @@ -0,0 +1,102 @@ +use alloc::string::String; +use alloc::vec::Vec; +use platform_challenge_sdk_wasm::host_functions::{ + host_get_timestamp, host_storage_get, host_storage_set, +}; + +use crate::types::TimeoutConfig; + +pub fn get_timeout_config() -> TimeoutConfig { + host_storage_get(b"timeout_config") + .ok() + .and_then(|d| { + if d.is_empty() { + None + } else { + bincode::deserialize(&d).ok() + } + }) + .unwrap_or_default() +} + +pub fn set_timeout_config(config: &TimeoutConfig) -> bool { + if let Ok(data) = bincode::serialize(config) { + return host_storage_set(b"timeout_config", &data).is_ok(); + } + false +} + +pub fn record_assignment(submission_id: &str, validator: &str, review_type: &str) -> bool { + let mut key = Vec::from(b"review_assignment:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + key.push(b':'); + key.extend_from_slice(review_type.as_bytes()); + key.push(b':'); + key.extend_from_slice(validator.as_bytes()); + + let timestamp = host_get_timestamp(); + host_storage_set(&key, ×tamp.to_le_bytes()).is_ok() +} + +pub fn check_timeout( + submission_id: &str, + validator: &str, + review_type: &str, + timeout_ms: u64, +) -> bool { + let mut key = Vec::from(b"review_assignment:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + key.push(b':'); + key.extend_from_slice(review_type.as_bytes()); + key.push(b':'); + key.extend_from_slice(validator.as_bytes()); + + if let Ok(data) = host_storage_get(&key) { + if data.len() >= 8 { + let mut buf = [0u8; 8]; + buf.copy_from_slice(&data[..8]); + let assigned_time = i64::from_le_bytes(buf); + let current_time = host_get_timestamp(); + let elapsed = (current_time - assigned_time) as u64; + return elapsed > timeout_ms; + } + } + false +} + +pub fn select_replacement( + validators: &[String], + excluded: &[String], + seed: &[u8], +) -> Option { + let available: Vec<&String> = validators + .iter() + .filter(|v| !excluded.iter().any(|e| e == *v)) + .collect(); + + if available.is_empty() { + return None; + } + + let idx = if seed.len() >= 4 { + let mut buf = [0u8; 4]; + buf.copy_from_slice(&seed[..4]); + u32::from_le_bytes(buf) as usize % available.len() + } else { + 0 + }; + + Some(available[idx].clone()) +} + +pub fn mark_timed_out(submission_id: &str, validator: &str, review_type: &str) -> bool { + let mut key = Vec::from(b"review_timeout:" as &[u8]); + key.extend_from_slice(submission_id.as_bytes()); + key.push(b':'); + key.extend_from_slice(review_type.as_bytes()); + key.push(b':'); + key.extend_from_slice(validator.as_bytes()); + + let timestamp = host_get_timestamp(); + host_storage_set(&key, ×tamp.to_le_bytes()).is_ok() +} diff --git a/wasm/src/types.rs b/wasm/src/types.rs index 3c4e0520..1425ac40 100644 --- a/wasm/src/types.rs +++ b/wasm/src/types.rs @@ -120,3 +120,210 @@ pub struct RouteDefinition { pub path: String, pub description: String, } + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SubmissionName { + pub name: String, + pub owner_hotkey: String, + pub registered_epoch: u64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SubmissionVersion { + pub version: u32, + pub agent_hash: String, + pub epoch: u64, + pub score: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LlmReviewResult { + pub approved: bool, + pub reason: String, + pub violations: Vec, + pub reviewer_validators: Vec, + pub scores: Vec, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct AstReviewResult { + pub passed: bool, + pub violations: Vec, + pub reviewer_validators: Vec, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum EvaluationStatus { + Pending, + LlmReview, + AstReview, + Evaluating, + Completed, + Failed, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TopAgentState { + pub agent_hash: String, + pub score: f64, + pub achieved_epoch: u64, + pub epochs_stale: u64, + pub decay_active: bool, + pub current_burn_percent: f64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LeaderboardEntry { + pub rank: u32, + pub hotkey: String, + pub score: f64, + pub pass_rate: f64, + pub submissions: u32, + pub last_epoch: u64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct StatsResponse { + pub total_submissions: u64, + pub active_miners: u64, + pub validator_count: u64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct TimeoutConfig { + pub evaluation_timeout_ms: u64, + pub llm_review_timeout_ms: u64, + pub ast_review_timeout_ms: u64, +} + +impl Default for TimeoutConfig { + fn default() -> Self { + Self { + evaluation_timeout_ms: 6 * 60 * 60 * 1000, + llm_review_timeout_ms: 3 * 60 * 1000, + ast_review_timeout_ms: 60 * 1000, + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct WhitelistConfig { + pub allowed_stdlib: Vec, + pub allowed_third_party: Vec, + pub forbidden_builtins: Vec, + pub max_code_size: usize, +} + +impl Default for WhitelistConfig { + fn default() -> Self { + use alloc::string::ToString; + Self { + allowed_stdlib: [ + "json", + "re", + "math", + "random", + "collections", + "itertools", + "functools", + "operator", + "string", + "textwrap", + "datetime", + "time", + "copy", + "pprint", + "typing", + "dataclasses", + "enum", + "abc", + "contextlib", + "warnings", + "bisect", + "heapq", + "array", + "types", + "decimal", + "fractions", + "statistics", + "hashlib", + "hmac", + "secrets", + "base64", + "binascii", + "struct", + "codecs", + "io", + "pathlib", + "argparse", + "logging", + "traceback", + "difflib", + "uuid", + "html", + "csv", + "os", + "sys", + "shutil", + "glob", + "subprocess", + ] + .iter() + .map(|s| s.to_string()) + .collect(), + allowed_third_party: [ + "term_sdk", + "numpy", + "pandas", + "scipy", + "sklearn", + "torch", + "tensorflow", + "transformers", + "openai", + "anthropic", + "httpx", + "aiohttp", + "requests", + "pydantic", + "rich", + "tqdm", + "litellm", + ] + .iter() + .map(|s| s.to_string()) + .collect(), + forbidden_builtins: ["exec", "eval", "compile", "__import__"] + .iter() + .map(|s| s.to_string()) + .collect(), + max_code_size: 1_048_576, + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LlmMessage { + pub role: String, + pub content: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LlmRequest { + pub model: String, + pub messages: Vec, + pub max_tokens: u32, + pub temperature: f64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct LlmResponse { + pub content: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct WasmRouteRequest { + pub method: String, + pub path: String, + pub body: Vec, +} From abe011278d077070b9ee23d7aa5f1375cce71de5 Mon Sep 17 00:00:00 2001 From: echobt Date: Wed, 18 Feb 2026 20:29:49 +0000 Subject: [PATCH 3/3] docs(validator): add validator setup and operations guide --- docs/validator/setup.md | 356 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 docs/validator/setup.md diff --git a/docs/validator/setup.md b/docs/validator/setup.md new file mode 100644 index 00000000..3b233906 --- /dev/null +++ b/docs/validator/setup.md @@ -0,0 +1,356 @@ +# Validator Setup Guide + +This guide covers setting up and operating a validator node for the Term Challenge subnet on the Platform-v2 network. + +--- + +## Hardware Requirements + +| Resource | Minimum | Recommended | Notes | +| --- | --- | --- | --- | +| CPU | 4 vCPU | 8 vCPU | WASM execution is CPU-bound | +| RAM | 16 GB | 32 GB | WASM runtime + P2P state | +| Storage | 250 GB SSD | 500 GB NVMe | Agent storage grows over time | +| Network | 100 Mbps | 100 Mbps | P2P mesh requires stable connectivity | +| OS | Ubuntu 22.04+ | Ubuntu 24.04 | Any Linux with glibc 2.35+ | + +--- + +## Software Prerequisites + +| Software | Version | Purpose | +| --- | --- | --- | +| Rust | 1.90+ | Building validator-node and WASM modules | +| Git | 2.30+ | Source code management | +| OpenSSL | 3.0+ | TLS for Bittensor RPC connections | +| `btcli` | Latest | Bittensor key management and registration | + +### Install Rust + +```bash +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +source $HOME/.cargo/env +rustup target add wasm32-unknown-unknown +``` + +### Install btcli + +```bash +pip install bittensor +``` + +--- + +## Bittensor Prerequisites + +1. **Generate a hotkey** (if you don't have one): + ```bash + btcli wallet new_hotkey --wallet.name my_validator --wallet.hotkey default + ``` + +2. **Register on the subnet**: + ```bash + btcli subnet register --netuid --wallet.name my_validator --wallet.hotkey default + ``` + +3. **Stake TAO** (minimum 1000 TAO required): + ```bash + btcli stake add --wallet.name my_validator --wallet.hotkey default --amount 1000 + ``` + +--- + +## Installation + +### 1. Clone Platform-v2 + +```bash +git clone https://github.com/PlatformNetwork/platform-v2.git +cd platform-v2 +``` + +### 2. Configure Environment + +```bash +cp .env.example .env +``` + +Edit `.env` with your validator configuration: + +```bash +# REQUIRED: Your validator secret key (BIP39 mnemonic or hex-encoded 32 bytes) +VALIDATOR_SECRET_KEY=your_secret_key_here + +# Optional: Slack webhook for monitoring notifications +# SLACK_WEBHOOK_URL=https://hooks.slack.com/services/xxx/xxx/xxx +``` + +### 3. Build the Validator + +```bash +cargo build --release --bin validator-node +``` + +### 4. Create Data Directory + +```bash +mkdir -p data +``` + +--- + +## Configuration + +### Environment Variables + +| Variable | Description | Default | Required | +| --- | --- | --- | --- | +| `VALIDATOR_SECRET_KEY` | BIP39 mnemonic or hex private key | — | Yes | +| `SUBTENSOR_ENDPOINT` | Bittensor RPC endpoint | `wss://entrypoint-finney.opentensor.ai:443` | No | +| `NETUID` | Subnet UID | `100` | No | +| `DATA_DIR` | Directory for validator state | `./data` | No | +| `RPC_PORT` | JSON-RPC API port | `8545` | No | +| `P2P_PORT` | libp2p mesh port | `9000` | No | +| `LOG_LEVEL` | Logging verbosity | `info` | No | +| `SLACK_WEBHOOK_URL` | Slack notifications webhook | — | No | + +### Network Ports + +| Port | Protocol | Usage | Required | +| --- | --- | --- | --- | +| 9000/tcp | libp2p | Validator P2P mesh communication | Yes | +| 8545/tcp | HTTP | JSON-RPC API for CLI and miners | Optional | + +Ensure these ports are open in your firewall: + +```bash +# UFW example +sudo ufw allow 9000/tcp +sudo ufw allow 8545/tcp +``` + +--- + +## Running a Validator Node + +### Direct Execution + +```bash +./target/release/validator-node \ + --data-dir ./data \ + --secret-key "${VALIDATOR_SECRET_KEY}" +``` + +### With systemd (Recommended for Production) + +Create `/etc/systemd/system/platform-validator.service`: + +```ini +[Unit] +Description=Platform-v2 Validator Node +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=validator +Group=validator +WorkingDirectory=/opt/platform-v2 +ExecStart=/opt/platform-v2/target/release/validator-node --data-dir /opt/platform-v2/data --secret-key "${VALIDATOR_SECRET_KEY}" +Restart=always +RestartSec=10 +LimitNOFILE=65535 +EnvironmentFile=/opt/platform-v2/.env + +[Install] +WantedBy=multi-user.target +``` + +Enable and start: + +```bash +sudo systemctl daemon-reload +sudo systemctl enable platform-validator +sudo systemctl start platform-validator +``` + +--- + +## WASM Module Management + +The validator automatically loads WASM challenge modules. To update the term-challenge module: + +### Build the WASM Module + +```bash +# In the term-challenge repository +cargo build --release --target wasm32-unknown-unknown -p term-challenge-wasm + +# Copy to the validator's challenge directory +cp target/wasm32-unknown-unknown/release/term_challenge_wasm.wasm \ + /opt/platform-v2/data/challenges/ +``` + +### Download via Platform CLI + +```bash +platform download term-challenge +``` + +--- + +## Monitoring and Health Checks + +### Health Endpoint + +```bash +curl http://localhost:8545/health +``` + +Expected response: + +```json +{ + "success": true, + "data": { + "status": "healthy", + "version": "0.1.0", + "uptime_secs": 86400 + } +} +``` + +### Status Endpoint + +```bash +curl http://localhost:8545/status +``` + +Returns current block height, epoch, validator count, and challenge count. + +### Epoch Information + +```bash +curl http://localhost:8545/epoch +``` + +Returns current epoch, phase (evaluation/commit/reveal), and blocks until next phase. + +### Using term-cli + +```bash +# Monitor network health +term-cli --rpc-url http://localhost:8545 --tab network + +# View leaderboard +term-cli --rpc-url http://localhost:8545 --tab leaderboard +``` + +### Log Monitoring + +```bash +# Follow validator logs +journalctl -u platform-validator -f + +# Filter for errors +journalctl -u platform-validator --since "1 hour ago" | grep -i error +``` + +### Key Metrics to Monitor + +| Metric | Healthy Range | Action if Unhealthy | +| --- | --- | --- | +| Uptime | > 99% | Check systemd restart logs | +| Peer count | ≥ 3 | Verify P2P port is open | +| Block height | Increasing | Check Bittensor RPC connectivity | +| Epoch progression | Advancing | Verify chain sync | +| Memory usage | < 80% of available | Increase RAM or check for leaks | +| Disk usage | < 80% of available | Prune old data or expand storage | + +--- + +## Validator Responsibilities + +As a Term Challenge validator, your node performs these duties: + +1. **Submission Validation** — Run WASM `validate()` on incoming submissions +2. **Security Review** — Perform LLM and AST reviews when assigned +3. **Agent Evaluation** — Execute agents against SWE-bench tasks via term-executor +4. **Log Consensus** — Propose and vote on agent evaluation logs +5. **Weight Submission** — Submit consensus weights to Bittensor at epoch boundaries +6. **State Sync** — Maintain synchronized state with other validators via P2P + +--- + +## Troubleshooting + +### Validator Not Connecting to Peers + +| Symptom | Cause | Solution | +| --- | --- | --- | +| 0 peers | Firewall blocking P2P port | Open port 9000/tcp | +| 0 peers | Incorrect boot nodes | Verify network configuration | +| Peers dropping | Unstable network | Check bandwidth and latency | +| Peers dropping | Clock skew | Sync system clock with NTP | + +### Bittensor Sync Issues + +| Symptom | Cause | Solution | +| --- | --- | --- | +| Block height not advancing | RPC endpoint down | Try alternate `SUBTENSOR_ENDPOINT` | +| Stake not detected | Registration not confirmed | Verify with `btcli wallet overview` | +| Weights not submitted | Insufficient stake | Ensure minimum 1000 TAO staked | + +### WASM Module Issues + +| Symptom | Cause | Solution | +| --- | --- | --- | +| Challenge not loading | Missing WASM file | Rebuild and copy the `.wasm` file | +| Evaluation failures | Outdated WASM module | Update to latest term-challenge version | +| High memory usage | Large submissions | Monitor and set memory limits | + +### Common Log Messages + +| Log Message | Meaning | Action | +| --- | --- | --- | +| `Validator sync complete` | Successfully synced from metagraph | Normal operation | +| `Submission validated` | A submission passed WASM validation | Normal operation | +| `Epoch transition` | New epoch started | Normal operation | +| `Weight submission failed` | Could not submit weights to chain | Check Bittensor connectivity | +| `Review assignment received` | Assigned to review a submission | Normal operation | +| `Review timeout` | Did not complete review in time | Check system resources | + +--- + +## Security Considerations + +- **Never share your `VALIDATOR_SECRET_KEY`** — it controls your validator identity and stake +- **Keep the `.env` file permissions restricted**: `chmod 600 .env` +- **Run as a non-root user** — create a dedicated `validator` user +- **Enable automatic updates** for OS security patches +- **Monitor for unauthorized access** to the RPC port (consider binding to localhost if not needed externally) +- **Back up your data directory** regularly — it contains validator state and consensus data + +--- + +## Upgrading + +### Update Platform-v2 + +```bash +cd /opt/platform-v2 +git pull origin main +cargo build --release --bin validator-node +sudo systemctl restart platform-validator +``` + +### Update Term Challenge WASM + +```bash +cd /opt/term-challenge +git pull origin main +cargo build --release --target wasm32-unknown-unknown -p term-challenge-wasm +cp target/wasm32-unknown-unknown/release/term_challenge_wasm.wasm \ + /opt/platform-v2/data/challenges/ +sudo systemctl restart platform-validator +```