diff --git a/definitions/minigrid.py b/definitions/minigrid.py
new file mode 100644
index 00000000..cb57ce32
--- /dev/null
+++ b/definitions/minigrid.py
@@ -0,0 +1,311 @@
+"""
+MiniGrid Definitions for GenESIS Framework
+
+Provides environment descriptions, action spaces, and other metadata
+for the MiniGrid/GridWorld evaluation domain.
+"""
+
+import numpy as np
+
+
+class MiniGridDefinitions:
+    """
+    Definitions for MiniGrid gridworld environments.
+
+    Follows the same structure as ProcGenDefinitions for consistency
+    with the GenESIS evaluation framework.
+    """
+
+    # Environment descriptions by tier
+    DESCRIPTIONS = {
+        # Tier 1: Pure Navigation
+        "tier1": {
+            "navigate to the goal": [
+                "Navigate through the grid to reach the goal position.",
+                "Avoid obstacles and find the shortest path.",
+                "The green square marks the goal location.",
+            ]
+        },
+        "tier1_maze_simple": {
+            "navigate to the goal": [
+                "Navigate through an empty room to reach the goal.",
+                "The green square marks the goal location.",
+            ]
+        },
+        "tier1_maze_corridor": {
+            "navigate through corridor to goal": [
+                "Navigate through a corridor with walls.",
+                "Find a path around obstacles to reach the goal.",
+                "The green square marks the goal location.",
+            ]
+        },
+        "tier1_maze_rooms": {
+            "navigate through rooms to goal": [
+                "Navigate through connected rooms.",
+                "Pass through doorways to reach the goal.",
+                "The green square marks the goal location.",
+            ]
+        },
+
+        # Tier 2: Linear Dependencies (Keys + Doors)
+        "tier2": {
+            "collect key and unlock door": [
+                "Collect the key to unlock the matching colored door.",
+                "Navigate to the goal after opening the door.",
+                "Match key colors to door colors.",
+            ]
+        },
+        "tier2_single_key": {
+            "collect key to unlock door": [
+                "Find and collect the key.",
+                "Use the key to unlock the matching door.",
+                "Navigate through the door to reach the goal.",
+            ]
+        },
+        "tier2_multi_key": {
+            "collect keys in order": [
+                "Multiple keys and doors block your path.",
+                "Collect keys in the correct order to progress.",
+                "Each key unlocks a door of the same color.",
+            ]
+        },
+        "tier2_colored_doors": {
+            "match keys to colored doors": [
+                "Multiple colored keys and doors.",
+                "Match each key to its corresponding door color.",
+                "Navigate through unlocked doors to reach the goal.",
+            ]
+        },
+
+        # Tier 3: Multi-Mechanism (Keys + Doors + Switches + Gates)
+        "tier3": {
+            "use keys switches and gates": [
+                "Combine key collection with switch activation.",
+                "Switches control gates that block passages.",
+                "Keys unlock doors, switches open gates.",
+            ]
+        },
+        "tier3_key_switch": {
+            "use key then switch": [
+                "First collect the key to unlock the door.",
+                "Then activate the switch to open the gate.",
+                "Navigate to the goal through opened passages.",
+            ]
+        },
+        "tier3_gates_switches": {
+            "activate switches to open gates": [
+                "Multiple switches control multiple gates.",
+                "Activate switches in the correct order.",
+                "Navigate through opened gates to the goal.",
+            ]
+        },
+        "tier3_complex_deps": {
+            "complex mechanism dependencies": [
+                "Keys, doors, switches, and gates interact.",
+                "Solve the dependency chain to reach the goal.",
+                "Some mechanisms may need to be activated in order.",
+            ]
+        },
+
+        # Tier 4: Irreversibility (Pushable blocks, consumables)
+        "tier4": {
+            "push blocks and use resources wisely": [
+                "Some actions cannot be undone.",
+                "Pushing blocks into corners may block progress.",
+                "Keys are consumed when used on doors.",
+            ]
+        },
+        "tier4_push_block": {
+            "push block to clear path": [
+                "Push the block out of the way.",
+                "Be careful - blocks can only be pushed, not pulled.",
+                "Plan your moves to avoid getting stuck.",
+            ]
+        },
+        "tier4_blocked_path": {
+            "push blocks strategically": [
+                "Multiple blocks need to be moved.",
+                "Wrong moves may permanently block paths.",
+                "Think ahead before pushing.",
+            ]
+        },
+        "tier4_consumable": {
+            "use limited resources wisely": [
+                "Keys are consumed when used.",
+                "Choose which doors to open carefully.",
+                "You may not have enough keys for all doors.",
+            ]
+        },
+
+        # Tier 5: Hidden Information
+        "tier5": {
+            "discover hidden rules": [
+                "Some mechanisms have hidden effects.",
+                "Experiment to discover how things work.",
+                "Information must be inferred from observation.",
+            ]
+        },
+        "tier5_hidden_switch": {
+            "find the hidden switch effect": [
+                "A switch controls a gate, but the connection is hidden.",
+                "Try interacting to discover what controls what.",
+                "Use trial and error to find the solution.",
+            ]
+        },
+        "tier5_infer_color": {
+            "infer the correct key color": [
+                "The door's required key color is not visible.",
+                "Try different keys to find which one works.",
+                "Only one key will open the door.",
+            ]
+        },
+        "tier5_memory": {
+            "remember visited locations": [
+                "Partial observability limits your view.",
+                "Remember where you've been and what you've seen.",
+                "Use memory to navigate efficiently.",
+            ]
+        },
+
+        # Default fallback
+        "default": {
+            "default": [
+                "Navigate the gridworld environment.",
+                "Use available actions to reach your goal.",
+                "Interact with objects as needed.",
+            ]
+        },
+    }
+
+    # Action space definitions (7 discrete actions)
+    movement_actions = {
+        0: "Turn left (rotate 90° counter-clockwise)",
+        1: "Turn right (rotate 90° clockwise)",
+        2: "Move forward (one cell in facing direction)",
+    }
+
+    interaction_actions = {
+        3: "Pick up (grab object directly in front)",
+        4: "Drop (release currently held object)",
+        5: "Toggle (interact with door, switch, or object in front)",
+        6: "Done/Wait (no operation, stay in place)",
+    }
+
+    ACTION_SPACES = {
+        # Tier 1: Navigation only
+        "tier1": {
+            "default": {
+                0: ("Movement action", movement_actions),
+            }
+        },
+        # Tier 2+: Full action space
+        "default": {
+            "default": {
+                0: ("Action", {**movement_actions, **interaction_actions}),
+            }
+        },
+        "tier2": {
+            "default": {
+                0: ("Action", {**movement_actions, **interaction_actions}),
+            }
+        },
+        "tier3": {
+            "default": {
+                0: ("Action", {**movement_actions, **interaction_actions}),
+            }
+        },
+        "tier4": {
+            "default": {
+                0: ("Action", {**movement_actions, **interaction_actions}),
+            }
+        },
+        "tier5": {
+            "default": {
+                0: ("Action", {**movement_actions, **interaction_actions}),
+            }
+        },
+    }
+
+    ACTION_EXCLUSIVENESS = {
+        "default": {
+            "default": True  # Only one action at a time
+        }
+    }
+
+    ADDITIONAL_INSTRUCTIONS = {
+        "tier1": {
+            "default": "Focus on navigation - use turn_left, turn_right, and move_forward to reach the green goal square."
+        },
+        "tier2": {
+            "default": "Collect keys (pickup action when facing key) and use them on matching colored doors (toggle action when facing door)."
+        },
+        "tier3": {
+            "default": "Use toggle action on switches to open gates. Combine with key/door mechanics to reach the goal."
+        },
+        "tier4": {
+            "default": "Be careful with irreversible actions. Pushing blocks into walls cannot be undone. Keys are consumed when used."
+        },
+        "tier5": {
+            "default": "Some information is hidden. Experiment with interactions to discover how mechanisms work."
+        },
+        "default": {
+            "default": None
+        }
+    }
+
+    ACTION_DECODE_STRATEGIES = {
+        "default": "single_discrete"
+    }
+
+    @staticmethod
+    def get_valid_action_space(tier: int = 2) -> list[int]:
+        """
+        Get the valid action IDs for a given difficulty tier.
+
+        Args:
+            tier: Difficulty tier (1-5)
+
+        Returns:
+            List of valid action IDs
+        """
+        if tier == 1:
+            # Navigation only
+            return [0, 1, 2, 6]  # turn_left, turn_right, forward, wait
+        else:
+            # Full action space
+            return list(range(7))
+
+    @staticmethod
+    def get_action_description(action_id: int) -> str:
+        """
+        Get human-readable description for an action.
+
+        Args:
+            action_id: Action ID (0-6)
+
+        Returns:
+            Action description string
+        """
+        all_actions = {
+            **MiniGridDefinitions.movement_actions,
+            **MiniGridDefinitions.interaction_actions
+        }
+        return all_actions.get(action_id, f"Unknown action {action_id}")
+
+    @staticmethod
+    def clip_action_to_valid(action: int, tier: int = 2) -> int:
+        """
+        Clip an action to the valid action space for a tier.
+
+        Args:
+            action: The predicted action
+            tier: Difficulty tier
+
+        Returns:
+            Valid action ID (defaults to wait/done if invalid)
+        """
+        valid_actions = MiniGridDefinitions.get_valid_action_space(tier)
+        if action in valid_actions:
+            return action
+        # Default to wait action
+        return 6
diff --git a/definitions/minigrid_prompt.py b/definitions/minigrid_prompt.py
new file mode 100644
index 00000000..132054f4
--- /dev/null
+++ b/definitions/minigrid_prompt.py
@@ -0,0 +1,163 @@
+"""
+MiniGrid Prompt Template for VLM Evaluation
+
+Formats instruction prompts for the gridworld evaluation domain.
+"""
+
+INSTRUCTION = [
+    "You are controlling an agent in a gridworld puzzle.",
+    "The environment is \"{env_name}\".",
+    "Task: {env_desc}",
+    "You see a top-down view of the grid. The agent is shown as a red triangle pointing in its facing direction.",
+    "Walls are grey, floors are light colored, and the goal is marked in green.",
+    "Objects: Keys are small colored shapes, doors are colored rectangles, switches are yellow circles.",
+    "The available actions are: {action_desc}",
+    "Output format: {output_format}",
+    "Respond with ONLY the action output, no explanations.",
+    "{additional_inst}"
+]
+
+
+def format_instruction_prompt(
+    env_name: str,
+    env_desc: str,
+    action_space: dict,
+    only_one_action: bool,
+    additional_inst: str = None
+) -> str:
+    """
+    Format the instruction prompt for VLM evaluation.
+
+    Args:
+        env_name: Name of the environment/task
+        env_desc: Description of the task objectives
+        action_space: Dictionary defining the action space
+        only_one_action: Whether only one action should be selected
+        additional_inst: Additional instructions to append
+
+    Returns:
+        Formatted instruction prompt string
+    """
+    instruction_format = ' '.join(INSTRUCTION)
+
+    # Format action descriptions
+    actions = []
+    for idx, tup in action_space.items():
+        if len(tup) == 2:  # Discrete action with options
+            desc, options = tup
+            if isinstance(options, dict):
+                # Format options as ID: Description pairs
+                opts_str = ", ".join([f"{k}: {v}" for k, v in options.items()])
+                sent = f"Action options: {opts_str}"
+            else:
+                sent = f"{idx}. {desc} => Options: {options}"
+        else:
+            sent = f"{idx}. {tup}"
+        actions.append(sent)
+
+    action_desc = '\n'.join(actions)
+
+    # Determine output format
+    if only_one_action:
+        output_format = (
+            "A single integer representing the action ID (0-6). "
+            "For example: 2 (to move forward)"
+        )
+    else:
+        output_format = (
+            "A list of action IDs. For example: [2] for a single forward move, "
+            "or [0, 2] for turn left then move forward."
+        )
+
+    # Build final prompt
+    if additional_inst is not None and additional_inst.strip():
+        prompt = instruction_format.format(
+            env_name=env_name,
+            env_desc=env_desc,
+            action_desc=action_desc,
+            output_format=output_format,
+            additional_inst=additional_inst
+        )
+    else:
+        prompt = instruction_format.format(
+            env_name=env_name,
+            env_desc=env_desc,
+            action_desc=action_desc,
+            output_format=output_format,
+            additional_inst=""
+        )
+
+    return prompt
+
+
+def format_simple_prompt(
+    task_description: str,
+    tier: int = 2,
+    include_action_space: bool = True
+) -> str:
+    """
+    Format a simplified prompt for quick evaluation.
+
+    Args:
+        task_description: Brief task description
+        tier: Difficulty tier (1-5)
+        include_action_space: Whether to include action space info
+
+    Returns:
+        Formatted prompt string
+    """
+    prompt_parts = [
+        "You are an agent in a gridworld puzzle.",
+        f"Task: {task_description}",
+        "The image shows your current view of the grid.",
+        "The red triangle is you (pointing in your facing direction).",
+        "Green square is the goal. Grey cells are walls.",
+    ]
+
+    if include_action_space:
+        if tier == 1:
+            prompt_parts.append(
+                "Actions: 0=turn left, 1=turn right, 2=move forward, 6=wait"
+            )
+        else:
+            prompt_parts.append(
+                "Actions: 0=turn left, 1=turn right, 2=move forward, "
+                "3=pickup, 4=drop, 5=toggle/interact, 6=wait"
+            )
+
+    prompt_parts.append("Output: A single integer (0-6) for your next action.")
+
+    return " ".join(prompt_parts)
+
+
+def format_observation_context(
+    agent_pos: tuple[int, int],
+    agent_dir: int,
+    carrying: str = None,
+    visible_objects: list[str] = None
+) -> str:
+    """
+    Format contextual information about the current observation.
+
+    Args:
+        agent_pos: Agent's (x, y) position
+        agent_dir: Agent's facing direction (0=right, 1=down, 2=left, 3=up)
+        carrying: What the agent is carrying (if anything)
+        visible_objects: List of visible object descriptions
+
+    Returns:
+        Context string to append to prompt
+    """
+    dir_names = {0: "right", 1: "down", 2: "left", 3: "up"}
+    context_parts = [
+        f"Agent position: ({agent_pos[0]}, {agent_pos[1]})",
+        f"Facing: {dir_names.get(agent_dir, 'unknown')}"
+    ]
+
+    if carrying:
+        context_parts.append(f"Carrying: {carrying}")
+
+    if visible_objects:
+        context_parts.append(f"Visible objects: {', '.join(visible_objects)}")
+
+    return " | ".join(context_parts)
diff --git a/src/config.json b/src/config.json
index 5c27d34f..ef73748a 100644
--- a/src/config.json
+++ b/src/config.json
@@ -23,7 +23,8 @@
         "language_table": "control",
         "openx": "control",
         "locomujoco": "control",
-        "overcooked_ai": "control"
+        "overcooked_ai": "control",
+        "minigrid": "control"
     },
     "models": {
         "gpt-5-chat-latest": ["vlm", "openai"],
diff --git a/src/data_utils/minigrid_dataloader.py b/src/data_utils/minigrid_dataloader.py
new file mode 100644
index 00000000..ff17eb33
--- /dev/null
+++ b/src/data_utils/minigrid_dataloader.py
@@ -0,0 +1,364 @@
+"""
+MiniGrid DataLoader for GenESIS Evaluation
+
+Provides PyTorch Dataset and DataLoader for MiniGrid gridworld tasks.
+"""
+
+from torch.utils.data import Dataset, DataLoader
+from typing import List, Dict, Any, Optional
+from collections import defaultdict
+from pathlib import Path
+import json
+import numpy as np
+import sys
+
+# Add paths for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent.parent / "v1_1"))
+
+from definitions.minigrid import MiniGridDefinitions
+
+
+class MiniGridDataset(Dataset):
+    """
+    PyTorch Dataset for MiniGrid gridworld tasks.
+
+    Loads task specifications and generates observations on-the-fly
+    by running episodes with the MiniGrid backend.
+    """
+
+    def __init__(
+        self,
+        task_files: List[str],
+        dataset_name: str = "minigrid",
+        by_episode: bool = False,
+        max_steps_per_episode: Optional[int] = None,
+        render_mode: str = "rgb_array",
+    ):
+        """
+        Initialize the MiniGrid dataset.
+
+        Args:
+            task_files: List of paths to task JSON files
+            dataset_name: Name for this dataset (e.g., "tier1", "tier2")
+            by_episode: If True, each item is a full episode; if False, each item is a step
+            max_steps_per_episode: Optional limit on steps per episode
+            render_mode: Rendering mode for observations
+        """
+        self.task_files = task_files
+        self.dataset_name = dataset_name
+        self.by_episode = by_episode
+        self.max_steps_per_episode = max_steps_per_episode
+        self.render_mode = render_mode
+
+        self._action_stats = None
+        self._episodes_cache = {}
+        self._step_index = []  # (task_idx, step_idx) for step-level access
+
+        # Pre-compute step index if needed
+        if not by_episode:
+            self._build_step_index()
+
+    def _build_step_index(self):
+        """Build index mapping flat indices to (task, step) pairs."""
+        for task_idx, task_file in enumerate(self.task_files):
+            # Load task to get max_steps
+            spec = self._load_task_spec(task_file)
+            max_steps = spec.get("max_steps", 100)
+            if self.max_steps_per_episode:
+                max_steps = min(max_steps, self.max_steps_per_episode)
+
+            for step_idx in range(max_steps):
+                self._step_index.append((task_idx, step_idx))
+
+    def _load_task_spec(self, path: str) -> dict:
+        """Load task specification from JSON file."""
+        with open(path, "r") as f:
+            data = json.load(f)
+        if "TaskSpecification" in data:
+            return data["TaskSpecification"]
+        return data
+
+    def _generate_episode(self, task_idx: int) -> List[Dict[str, Any]]:
+        """
+        Generate episode data by running the task.
+
+        Args:
+            task_idx: Index of the task file
+
+        Returns:
+            List of step data dictionaries
+        """
+        if task_idx in self._episodes_cache:
+            return self._episodes_cache[task_idx]
+
+        # Import here to avoid circular imports
+        from v1_1.minigrid.task_spec import TaskSpecification
+        from v1_1.minigrid.backends.minigrid_backend import MiniGridBackend
+
+        # Load task specification
+        spec_dict = self._load_task_spec(self.task_files[task_idx])
+        spec = TaskSpecification.from_dict(spec_dict)
+
+        # Create backend and run episode with random policy
+        backend = MiniGridBackend(render_mode=self.render_mode)
+        backend.configure(spec)
+
+        obs, state, info = backend.reset(seed=spec.seed)
+        mission = backend.get_mission_text()
+
+        episode_data = []
+        step = 0
+        terminated = False
+        truncated = False
+
+        max_steps = spec.max_steps
+        if self.max_steps_per_episode:
+            max_steps = min(max_steps, self.max_steps_per_episode)
+
+        while not terminated and not truncated and step < max_steps:
+            # Random action for data generation
+            action = np.random.randint(0, 7)
+
+            # Get observation before action
+            rgb_obs = backend.render()
+
+            # Execute action
+            next_obs, reward, terminated, truncated, next_state, _ = backend.step(action)
+
+            # Determine tier/env name for text observation
+            tier_name = f"tier{spec.difficulty_tier}"
+            env_names = list(MiniGridDefinitions.DESCRIPTIONS.get(tier_name, {}).keys())
+            text_obs = env_names[0] if env_names else "navigate to the goal"
+
+            # Store step data
+            step_data = {
+                "text_observation": text_obs,
+                "image_observation": rgb_obs.astype(np.uint8),
+                "action": np.array([action], dtype=np.int64),
+                "reward": reward,
+                "is_last": terminated or truncated,
+                "mission": mission,
+                "task_id": spec.task_id,
+                "tier": spec.difficulty_tier,
+                "agent_position": list(state.agent_position),
+                "agent_direction": state.agent_direction,
+            }
+
+            episode_data.append(step_data)
+            obs = next_obs
+            state = next_state
+            step += 1
+
+        backend.close()
+
+        # Cache the episode
+        self._episodes_cache[task_idx] = episode_data
+
+        # Update action stats
+        if self._action_stats is None and episode_data:
+            self._action_stats = {
+                "size": episode_data[0]["action"].shape,
+                "min": 0,
+                "max": 6,
+                "mean": 3.0,
+            }
+
+        return episode_data
+
+    @property
+    def action_stats(self):
+        """Get action space statistics."""
+        if self._action_stats is None:
+            self._action_stats = {
+                "size": (1,),  # Single discrete action
+                "min": 0,
+                "max": 6,
+                "mean": 3.0,
+            }
+        return self._action_stats
+
+    def __len__(self) -> int:
+        if self.by_episode:
+            return len(self.task_files)
+        return len(self._step_index)
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        if self.by_episode:
+            # Return full episode
+            episode = self._generate_episode(idx)
+            return self._process_episode(episode)
+        else:
+            # Return single step
+            task_idx, step_idx = self._step_index[idx]
+            episode = self._generate_episode(task_idx)
+            if step_idx < len(episode):
+                return episode[step_idx]
+            else:
+                # Return last step if index is beyond episode length
+                return episode[-1]
+
+    def _process_episode(self, episode: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Process episode into batched format.
+
+        Args:
+            episode: List of step dictionaries
+
+        Returns:
+            Dictionary with lists of values per key
+        """
+        result = defaultdict(list)
+        for step in episode:
+            for key, value in step.items():
+                result[key].append(value)
+        return dict(result)
+
+
+class MiniGridPrecomputedDataset(Dataset):
+    """
+    Dataset for pre-generated MiniGrid observations.
+
+    Uses saved numpy arrays and metadata instead of running episodes live.
+    """
+
+    def __init__(
+        self,
+        data_dir: str,
+        dataset_name: str = "minigrid",
+        by_episode: bool = False,
+    ):
+        """
+        Initialize from pre-computed data directory.
+
+        Args:
+            data_dir: Directory containing observation files and metadata
+            dataset_name: Name for this dataset
+            by_episode: If True, group by episode
+        """
+        self.data_dir = Path(data_dir)
+        self.dataset_name = dataset_name
+        self.by_episode = by_episode
+
+        # Load metadata
+        metadata_path = self.data_dir / "metadata.json"
+        if metadata_path.exists():
+            with open(metadata_path, "r") as f:
+                self.metadata = json.load(f)
+        else:
+            self.metadata = {"samples": []}
+
+        self.samples = self.metadata.get("samples", [])
+        self._action_stats = {
+            "size": (1,),
+            "min": 0,
+            "max": 6,
+            "mean": 3.0,
+        }
+
+    @property
+    def action_stats(self):
+        return self._action_stats
+
+    def __len__(self) -> int:
+        return len(self.samples)
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        sample = self.samples[idx]
+
+        # Load observation image
+        img_path = self.data_dir / sample.get("image_path", f"obs_{idx}.npy")
+        if img_path.exists():
+            image_obs = np.load(img_path)
+        else:
+            image_obs = np.zeros((64, 64, 3), dtype=np.uint8)
+
+        return {
+            "text_observation": sample.get("mission", "navigate to the goal"),
+            "image_observation": image_obs,
+            "action": np.array([sample.get("action", 0)], dtype=np.int64),
+            "reward": sample.get("reward", 0.0),
+            "is_last": sample.get("is_last", False),
+            "task_id": sample.get("task_id", "unknown"),
+            "tier": sample.get("tier", 1),
+        }
+
+
+def custom_collate(batch: List[Dict[str, Any]]) -> Dict[str, List[Any]]:
+    """Custom collate function for DataLoader."""
+    result = defaultdict(list)
+    for item in batch:
+        for key, value in item.items():
+            result[key].append(value)
+    return dict(result)
+
+
+def get_minigrid_dataloader(
+    task_files: List[str],
+    batch_size: int,
+    dataset_name: str = "minigrid",
+    num_workers: int = 0,
+    by_episode: bool = False,
+) -> tuple:
+    """
+    Create MiniGrid dataset and dataloader.
+
+    Args:
+        task_files: List of task JSON file paths
+        batch_size: Batch size
+        dataset_name: Dataset name
+        num_workers: Number of data loading workers
+        by_episode: Whether to load by episode
+
+    Returns:
+        Tuple of (dataset, dataloader)
+    """
+    dataset = MiniGridDataset(
+        task_files=task_files,
+        dataset_name=dataset_name,
+        by_episode=by_episode,
+    )
+
+    dataloader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        collate_fn=custom_collate,
+    )
+
+    return dataset, dataloader
+
+
+def get_minigrid_precomputed_dataloader(
+    data_dir: str,
+    batch_size: int,
+    dataset_name: str = "minigrid",
+    num_workers: int = 0,
+) -> tuple:
+    """
+    Create dataloader from pre-computed observations.
+
+    Args:
+        data_dir: Directory with saved observations
+        batch_size: Batch size
+        dataset_name: Dataset name
+        num_workers: Number of workers
+
+    Returns:
+        Tuple of (dataset, dataloader)
+    """
+    dataset = MiniGridPrecomputedDataset(
+        data_dir=data_dir,
+        dataset_name=dataset_name,
+    )
+
+    dataloader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        collate_fn=custom_collate,
+    )
+
+    return dataset, dataloader
diff --git a/src/modules/dataset_modules/minigrid_module.py b/src/modules/dataset_modules/minigrid_module.py
new file mode 100644
index 00000000..dcd4311b
--- /dev/null
+++ b/src/modules/dataset_modules/minigrid_module.py
@@ -0,0 +1,376 @@
+"""
+MiniGrid Dataset Module for GenESIS Evaluation
+
+Provides MiniGridModule and MiniGridBatchModule following the DatasetModule pattern.
+"""
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, List, Optional
+import json
+import glob
+import numpy as np
+import os
+import sys
+
+# Add paths for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from src.modules.dataset_modules.base_dataset_module import DatasetModule, DatasetBatchModule, BatchInfo
+from definitions.minigrid import MiniGridDefinitions
+from definitions.minigrid_prompt import format_instruction_prompt
+from src.data_utils.minigrid_dataloader import get_minigrid_dataloader
+
+
+class MiniGridModule(DatasetModule):
+    """
+    MiniGrid dataset module for VLM evaluation.
+
+    Follows the same pattern as other DatasetModules in the GenESIS framework.
+    """
+
+    def __init__(
+        self,
+        disk_root_dir: str,
+        modality: str = "vlm",
+        source: str = "openai",
+        model: str = "gpt-4o",
+        dataset_name: str = "minigrid",
+        batch_size: int = 1,
+        k_shots: int = 0,
+        tier: Optional[int] = None,
+    ):
+        """
+        Initialize the MiniGrid module.
+
+        Args:
+            disk_root_dir: Root directory containing task files
+            modality: Modality type (only "vlm" supported)
+            source: Model source (e.g., "openai")
+            model: Model name
+            dataset_name: Dataset name (e.g., "tier1", "tier2", etc.)
+            batch_size: Batch size for evaluation
+            k_shots: Number of few-shot examples
+            tier: Optional tier filter (1-5)
+        """
+        super().__init__(
+            disk_root_dir=disk_root_dir,
+            modality=modality,
+            source=source,
+            model=model,
+            dataset_name=dataset_name,
+            batch_size=batch_size,
+            k_shots=k_shots,
+        )
+
+        self._definitions_class = MiniGridDefinitions
+        self.dataset_family = "minigrid"
+        self.format_instruction_prompt_fn = format_instruction_prompt
+        self.get_dataloader_fn = get_minigrid_dataloader
+        self.tier = tier
+
+    def _find_shards(self, dataset: str) -> List[str]:
+        """
+        Find task files for the given dataset.
+
+        Args:
+            dataset: Dataset name (e.g., "tier1", "minigrid")
+
+        Returns:
+            List of task file paths
+        """
+        # Look for task files in the expected locations
+        search_patterns = [
+            f"{self.disk_root_dir}/**/{dataset}*.json",
+            f"{self.disk_root_dir}/**/tier*/*.json",
+            f"{self.disk_root_dir}/**/*.json",
+        ]
+
+        task_files = []
+        for pattern in search_patterns:
+            found = glob.glob(pattern, recursive=True)
+            task_files.extend(found)
+
+        # Remove duplicates and filter by tier if specified
+        task_files = list(set(task_files))
+
+        if self.tier is not None:
+            task_files = [
+                f for f in task_files
+                if f"tier{self.tier}" in f or self._task_has_tier(f, self.tier)
+            ]
+
+        return sorted(task_files)
+
+    def _task_has_tier(self, path: str, tier: int) -> bool:
+        """Check if a task file has the specified tier."""
+        try:
+            with open(path, "r") as f:
+                data = json.load(f)
+            if "TaskSpecification" in data:
+                data = data["TaskSpecification"]
+            return data.get("difficulty_tier", 0) == tier
+        except Exception:
+            return False
+
+    def _run_eval_dataset(self, dataset: str) -> dict:
+        """
+        Run evaluation on a dataset.
+
+        Args:
+            dataset: Dataset name
+
+        Returns:
+            Dictionary of evaluation results
+        """
+        task_files = self._find_shards(dataset)
+        if len(task_files) == 0:
+            return {"error": f"No task files found for dataset {dataset}"}
+
+        # Create dataloader
+        dataloader_obj, dataloader = self.get_dataloader_fn(
+            task_files,
+            batch_size=self.batch_size,
+            dataset_name=dataset,
+            by_episode=True,
+        )
+
+        # Initialize metrics
+        total_samples = 0
+        correct_predictions = 0
+        all_predictions = []
+        all_labels = []
+
+        for episode_batch in dataloader:
+            # Process batch through the module
+            for batch_data in self._process_batch(episode_batch, dataset):
+                cur_inputs, _, instructions, labels, idxs, output_types, is_lasts = batch_data
+
+                # Get predictions from modality module
+                predictions = self.modality_module.get_predictions(
+                    cur_inputs, instructions
+                )
+
+                # Evaluate predictions
+                for pred, label in zip(predictions, labels):
+                    total_samples += 1
+                    all_predictions.append(pred)
+                    all_labels.append(label)
+
+                    # Check correctness (exact match for discrete actions)
+                    if self._check_prediction(pred, label):
+                        correct_predictions += 1
+
+            if self.action_stats is None:
+                self.action_stats = dataloader_obj.action_stats
+
+        # Compute metrics
+        accuracy = correct_predictions / max(total_samples, 1)
+
+        return {
+            "accuracy": accuracy,
+            "exact_match_rate": accuracy,
+            "total_samples": total_samples,
+            "correct_predictions": correct_predictions,
+            "predictions": all_predictions,
+            "labels": [l.tolist() if hasattr(l, 'tolist') else l for l in all_labels],
+        }
+
+    def _check_prediction(self, prediction: Any, label: Any) -> bool:
+        """
+        Check if prediction matches label.
+
+        Args:
+            prediction: Model prediction
+            label: Ground truth label
+
+        Returns:
+            Whether prediction is correct
+        """
+        try:
+            # Handle various prediction formats
+            if isinstance(prediction, list):
+                pred_action = prediction[0] if prediction else -1
+            elif isinstance(prediction, dict):
+                # Handle probability distribution
+                pred_action = max(prediction, key=prediction.get)
+            else:
+                pred_action = prediction
+
+            # Handle label formats
+            if isinstance(label, np.ndarray):
+                true_action = label[0] if label.size > 0 else -1
+            elif isinstance(label, list):
+                true_action = label[0] if label else -1
+            else:
+                true_action = label
+
+            return int(pred_action) == int(true_action)
+        except Exception:
+            return False
+
+
+class MiniGridBatchModule(DatasetBatchModule):
+    """
+    MiniGrid batch module for OpenAI batch API evaluation.
+
+    Supports sending batch jobs and processing results.
+    """
+
+    def __init__(
+        self,
+        disk_root_dir: str,
+        modality: str = "vlm",
+        source: str = "openai",
+        model: str = "gpt-4o",
+        batch_info_dir: str = "./batch_info",
+        batch_size: int = 1,
+        k_shots: int = 0,
+        tier: Optional[int] = None,
+    ):
+        """
+        Initialize the MiniGrid batch module.
+
+        Args:
+            disk_root_dir: Root directory containing task files
+            modality: Modality type
+            source: Model source
+            model: Model name
+            batch_info_dir: Directory for batch info files
+            batch_size: Batch size
+            k_shots: Number of few-shot examples
+            tier: Optional tier filter
+        """
+        super().__init__(
+            disk_root_dir=disk_root_dir,
+            modality=modality,
+            source=source,
+            model=model,
+            batch_info_dir=batch_info_dir,
+            batch_size=batch_size,
+            k_shots=k_shots,
+        )
+
+        self._definitions_class = MiniGridDefinitions
+        self.dataset_family = "minigrid"
+        self.format_instruction_prompt_fn = format_instruction_prompt
+        self.get_dataloader_fn = get_minigrid_dataloader
+        self.tier = tier
+
+    @property
+    def datasets(self):
+        """Get list of available datasets."""
+        if len(self._datasets) == 0:
+            # Default datasets by tier
+            self._datasets = [
+                "tier1", "tier2", "tier3", "tier4", "tier5"
+            ]
+            if self.tier is not None:
+                self._datasets = [f"tier{self.tier}"]
+        return self._datasets
+
+    def _find_shards(self, dataset: str) -> List[str]:
+        """Find task files for the given dataset."""
+        search_patterns = [
+            f"{self.disk_root_dir}/**/{dataset}/*.json",
+            f"{self.disk_root_dir}/{dataset}/**/*.json",
+            f"{self.disk_root_dir}/**/*.json",
+        ]
+
+        task_files = []
+        for pattern in search_patterns:
+            found = glob.glob(pattern, recursive=True)
+            task_files.extend(found)
+
+        task_files = list(set(task_files))
+
+        # Filter by tier in filename or content
+        if dataset.startswith("tier"):
+            tier_num = int(dataset.replace("tier", ""))
+            task_files = [
+                f for f in task_files
+                if f"tier{tier_num}" in f or self._task_has_tier(f, tier_num)
+            ]
+
+        return sorted(task_files)
+
+    def _task_has_tier(self, path: str, tier: int) -> bool:
+        """Check if a task file has the specified tier."""
+        try:
+            with open(path, "r") as f:
+                data = json.load(f)
+            if "TaskSpecification" in data:
+                data = data["TaskSpecification"]
+            return data.get("difficulty_tier", 0) == tier
+        except Exception:
+            return False
+
+    def _run_eval_dataset(self, batch_info_files: List[str]) -> dict:
+        """
+        Process batch results for evaluation.
+
+        Args:
+            batch_info_files: List of batch info file paths
+
+        Returns:
+            Dictionary of evaluation results
+        """
+        total_samples = 0
+        correct_predictions = 0
+        all_predictions = []
+        all_labels = []
+
+        for batch_file in batch_info_files:
+            # Load batch info
+            batch_data = np.load(batch_file, allow_pickle=True)
+
+            batch_id = str(batch_data["batch_id"])
+            labels = batch_data["labels"]
+            output_types = batch_data["output_types"]
+
+            # Get predictions from modality module
+            predictions = self.modality_module.get_batch_results(batch_id)
+
+            if predictions is None:
+                continue
+
+            # Evaluate predictions
+            for pred, label in zip(predictions, labels):
+                total_samples += 1
+                all_predictions.append(pred)
+                all_labels.append(label)
+
+                if self._check_prediction(pred, label):
+                    correct_predictions += 1
+
+        accuracy = correct_predictions / max(total_samples, 1)
+
+        return {
+            "accuracy": accuracy,
+            "exact_match_rate": accuracy,
+            "total_samples": total_samples,
+            "correct_predictions": correct_predictions,
+            "predictions": all_predictions,
+            "labels": [l.tolist() if hasattr(l, 'tolist') else l for l in all_labels],
+        }
+
+    def _check_prediction(self, prediction: Any, label: Any) -> bool:
+        """Check if prediction matches label."""
+        try:
+            if isinstance(prediction, list):
+                pred_action = prediction[0] if prediction else -1
+            elif isinstance(prediction, dict):
+                pred_action = max(prediction, key=prediction.get)
+            else:
+                pred_action = prediction
+
+            if isinstance(label, np.ndarray):
+                true_action = label[0] if label.size > 0 else -1
+            elif isinstance(label, list):
+                true_action = label[0] if label else -1
+            else:
+                true_action = label
+
+            return int(pred_action) == int(true_action)
+        except Exception:
+            return False
diff --git a/src/v1_1/adapters/__init__.py b/src/v1_1/adapters/__init__.py
new file mode 100644
index 00000000..2d54f34a
--- /dev/null
+++ b/src/v1_1/adapters/__init__.py
@@ -0,0 +1 @@
+"""Model adapters for MultiNet v1.1 evaluation."""
diff --git a/src/v1_1/adapters/lmstudio_vlm_adapter.py b/src/v1_1/adapters/lmstudio_vlm_adapter.py
new file mode 100644
index 00000000..0b41a320
--- /dev/null
+++ b/src/v1_1/adapters/lmstudio_vlm_adapter.py
@@ -0,0 +1,143 @@
+"""
+LMStudio VLM Adapter for MultiNet v1.1
+
+Uses the OpenAI-compatible chat/completions endpoint provided by LMStudio.
+Also works with any OpenAI-compatible vision API.
+
+Usage:
+    adapter = LMStudioVLMAdapter(model="qwen2.5-vl-7b")
+    output = adapter.predict(model_input)
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import re
+import urllib.request
+import urllib.error
+
+import numpy as np
+from PIL import Image
+
+from ..model_interface import ModelInterface, ModelInput, ModelOutput
+
+
+class LMStudioVLMAdapter(ModelInterface):
+    """
+    Model adapter using the OpenAI-compatible API (LMStudio, vLLM, etc.).
+
+    Sends image via data URL in chat completions format.
+    """
+
+    def __init__(
+        self,
+        model: str = "qwen2.5-vl-7b",
+        base_url: str = "http://localhost:1234",
+        temperature: float = 0.0,
+        max_tokens: int = 256,
+    ):
+        self.model = model
+        self.base_url = base_url.rstrip("/")
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+
+    @property
+    def model_name(self) -> str:
+        return f"lmstudio_{self.model}"
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        # Encode image as base64 data URL
+        img = Image.fromarray(input.image)
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+        data_url = f"data:image/png;base64,{img_b64}"
+
+        # Build prompt
+        action_lines = "\n".join(
+            f"  {aid}: {aname}" for aid, aname in sorted(input.action_space.items())
+        )
+        text_prompt = (
+            f"You are controlling an agent in a gridworld environment.\n"
+            f"Mission: {input.text_prompt}\n"
+            f"Step: {input.step_number}/{input.max_steps}\n\n"
+            f"Available actions:\n{action_lines}\n\n"
+            f"Look at the image showing the current state of the environment. "
+            f"The agent is the blue triangle. The goal is the green square.\n\n"
+            f"Choose the best action to accomplish the mission. "
+            f"Respond with ONLY the action number (0-6) on the first line."
+        )
+
+        # OpenAI-compatible chat completions payload
+        payload = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": data_url}},
+                        {"type": "text", "text": text_prompt},
+                    ],
+                }
+            ],
+            "temperature": self.temperature,
+            "max_tokens": self.max_tokens,
+        }
+
+        try:
+            req = urllib.request.Request(
+                f"{self.base_url}/v1/chat/completions",
+                data=json.dumps(payload).encode("utf-8"),
+                headers={"Content-Type": "application/json"},
+                method="POST",
+            )
+            with urllib.request.urlopen(req, timeout=120) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+
+            raw_output = result["choices"][0]["message"]["content"]
+            action, confidence, reasoning = self._parse_response(raw_output, input.action_space)
+
+            return ModelOutput(
+                action=action,
+                confidence=confidence,
+                reasoning=reasoning,
+                raw_output=raw_output,
+            )
+
+        except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError, KeyError) as e:
+            return ModelOutput(
+                action=6,
+                confidence=0.0,
+                reasoning=f"API error: {e}",
+                raw_output=str(e),
+            )
+
+    def _parse_response(
+        self, text: str, action_space: dict[int, str]
+    ) -> tuple[int, float | None, str | None]:
+        """Parse action from model response text."""
+        valid_actions = set(action_space.keys())
+        text = text.strip()
+
+        first_line = text.split("\n")[0].strip()
+        match = re.search(r"\b([0-6])\b", first_line)
+        if match:
+            action = int(match.group(1))
+            if action in valid_actions:
+                reasoning = text[match.end():].strip() or None
+                return action, None, reasoning
+
+        matches = re.findall(r"\b([0-6])\b", text)
+        if matches:
+            action = int(matches[0])
+            if action in valid_actions:
+                return action, None, text
+
+        text_lower = text.lower()
+        for aid, aname in action_space.items():
+            if aname.lower() in text_lower:
+                return aid, None, text
+
+        return 6, 0.0, f"Could not parse action from: {text[:200]}"
diff --git a/src/v1_1/adapters/ollama_vlm_adapter.py b/src/v1_1/adapters/ollama_vlm_adapter.py
new file mode 100644
index 00000000..8e528966
--- /dev/null
+++ b/src/v1_1/adapters/ollama_vlm_adapter.py
@@ -0,0 +1,149 @@
+"""
+Ollama VLM Adapter for MultiNet v1.1
+
+Connects to a local Ollama server to use open-source VLMs for MiniGrid evaluation.
+Recommended model: qwen2.5vl:7b (best accuracy in the 7B VLM class).
+Fallback options: llava:7b, llava:13b, minicpm-v.
+
+Usage:
+    adapter = OllamaVLMAdapter(model="qwen2.5vl:7b")
+    output = adapter.predict(model_input)
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import re
+import urllib.request
+import urllib.error
+
+import numpy as np
+from PIL import Image
+
+from ..model_interface import ModelInterface, ModelInput, ModelOutput
+
+
+class OllamaVLMAdapter(ModelInterface):
+    """
+    Model adapter that connects to a local Ollama server for VLM inference.
+
+    Sends image as base64 + text prompt, receives generated text, parses action.
+    Works with any Ollama vision model (qwen2.5vl, llava, minicpm-v, etc.).
+    """
+
+    def __init__(
+        self,
+        model: str = "qwen2.5vl:7b",
+        base_url: str = "http://localhost:11434",
+        temperature: float = 0.0,
+        max_tokens: int = 256,
+    ):
+        self.model = model
+        self.base_url = base_url.rstrip("/")
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+
+    @property
+    def model_name(self) -> str:
+        return f"ollama_{self.model}"
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        # Encode image as base64
+        img = Image.fromarray(input.image)
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+
+        # Build prompt with action space description
+        action_lines = "\n".join(
+            f"  {aid}: {aname}" for aid, aname in sorted(input.action_space.items())
+        )
+        prompt = (
+            f"You are controlling an agent in a gridworld environment.\n"
+            f"Mission: {input.text_prompt}\n"
+            f"Step: {input.step_number}/{input.max_steps}\n\n"
+            f"Available actions:\n{action_lines}\n\n"
+            f"Look at the image showing the current state of the environment. "
+            f"The agent is the blue triangle. The goal is the green square.\n\n"
+            f"Choose the best action to accomplish the mission. "
+            f"Respond with ONLY the action number (0-6) on the first line, "
+            f"then optionally explain your reasoning."
+        )
+
+        if input.additional_context:
+            prompt += f"\n\nAdditional context: {input.additional_context}"
+
+        # Call Ollama API
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "images": [img_b64],
+            "stream": False,
+            "options": {
+                "temperature": self.temperature,
+                "num_predict": self.max_tokens,
+            },
+        }
+
+        try:
+            req = urllib.request.Request(
+                f"{self.base_url}/api/generate",
+                data=json.dumps(payload).encode("utf-8"),
+                headers={"Content-Type": "application/json"},
+                method="POST",
+            )
+            with urllib.request.urlopen(req, timeout=120) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+
+            raw_output = result.get("response", "")
+            action, confidence, reasoning = self._parse_response(raw_output, input.action_space)
+
+            return ModelOutput(
+                action=action,
+                confidence=confidence,
+                reasoning=reasoning,
+                raw_output=raw_output,
+            )
+
+        except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError) as e:
+            # Fallback to wait action if API is unreachable
+            return ModelOutput(
+                action=6,
+                confidence=0.0,
+                reasoning=f"API error: {e}",
+                raw_output=str(e),
+            )
+
+    def _parse_response(
+        self, text: str, action_space: dict[int, str]
+    ) -> tuple[int, float | None, str | None]:
+        """Parse action from model response text."""
+        valid_actions = set(action_space.keys())
+        text = text.strip()
+
+        # Try to find a bare integer on the first line
+        first_line = text.split("\n")[0].strip()
+        match = re.search(r"\b([0-6])\b", first_line)
+        if match:
+            action = int(match.group(1))
+            if action in valid_actions:
+                reasoning = text[match.end():].strip() or None
+                return action, None, reasoning
+
+        # Try to find any integer in the full text
+        matches = re.findall(r"\b([0-6])\b", text)
+        if matches:
+            action = int(matches[0])
+            if action in valid_actions:
+                return action, None, text
+
+        # Try matching action names
+        text_lower = text.lower()
+        for aid, aname in action_space.items():
+            if aname.lower() in text_lower:
+                return aid, None, text
+
+        # Fallback: wait
+        return 6, 0.0, f"Could not parse action from: {text[:200]}"
diff --git a/src/v1_1/adapters/paligemma_adapter.py b/src/v1_1/adapters/paligemma_adapter.py
new file mode 100644
index 00000000..e88d3763
--- /dev/null
+++ b/src/v1_1/adapters/paligemma_adapter.py
@@ -0,0 +1,137 @@
+"""
+PaliGemma Adapter for MultiNet v1.1
+
+Uses Google's PaliGemma VLM for MiniGrid evaluation.
+Lighter weight than Pi0/Magma, good for quick iteration.
+
+Usage:
+    adapter = PaliGemmaMiniGridAdapter()
+    adapter.setup(device="cuda:0")
+    output = adapter.predict(model_input)
+"""
+
+from __future__ import annotations
+
+import re
+import numpy as np
+from PIL import Image
+
+from ..model_interface import ModelInterface, ModelInput, ModelOutput
+
+
+class PaliGemmaMiniGridAdapter(ModelInterface):
+    """
+    PaliGemma VLM adapter for MiniGrid evaluation.
+
+    Uses google/paligemma2-3b-pt-896 or google/paligemma-3b-mix-448
+    via the transformers library.
+    """
+
+    def __init__(self, model_id: str = "google/paligemma2-3b-pt-896"):
+        self.model_id = model_id
+        self.model = None
+        self.processor = None
+        self.device = "cpu"
+
+    @property
+    def model_name(self) -> str:
+        return f"paligemma_{self.model_id.split('/')[-1]}"
+
+    def setup(self, device: str = "cpu") -> None:
+        import torch
+        from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+
+        self.device = device
+        dtype = torch.bfloat16 if "cuda" in device else torch.float32
+
+        self.processor = AutoProcessor.from_pretrained(self.model_id)
+        self.model = PaliGemmaForConditionalGeneration.from_pretrained(
+            self.model_id,
+            torch_dtype=dtype,
+        ).to(device)
+        self.model.eval()
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        import torch
+
+        if self.model is None or self.processor is None:
+            raise RuntimeError("Call setup() before predict()")
+
+        # Convert observation to PIL image
+        img = Image.fromarray(input.image).convert("RGB")
+
+        # Build prompt
+        action_lines = ", ".join(
+            f"{aid}={aname}" for aid, aname in sorted(input.action_space.items())
+        )
+        prompt = (
+            f"This is a gridworld navigation task. {input.text_prompt} "
+            f"Actions: {action_lines}. "
+            f"The blue triangle is the agent, green square is the goal. "
+            f"Output the best action number (0-6):"
+        )
+
+        # Process and generate
+        inputs = self.processor(
+            text=prompt,
+            images=img,
+            return_tensors="pt",
+        ).to(self.device)
+
+        with torch.no_grad():
+            output_ids = self.model.generate(
+                **inputs,
+                max_new_tokens=32,
+                do_sample=False,
+            )
+
+        # Decode only the generated tokens (skip input)
+        input_len = inputs["input_ids"].shape[-1]
+        raw_output = self.processor.decode(
+            output_ids[0][input_len:], skip_special_tokens=True
+        )
+
+        # Parse action
+        action, confidence, reasoning = self._parse_response(raw_output, input.action_space)
+
+        return ModelOutput(
+            action=action,
+            confidence=confidence,
+            reasoning=reasoning,
+            raw_output=raw_output,
+        )
+
+    def _parse_response(
+        self, text: str, action_space: dict[int, str]
+    ) -> tuple[int, float | None, str | None]:
+        """Parse action from model response."""
+        valid_actions = set(action_space.keys())
+        text = text.strip()
+
+        match = re.search(r"\b([0-6])\b", text)
+        if match:
+            action = int(match.group(1))
+            if action in valid_actions:
+                return action, None, text
+
+        text_lower = text.lower()
+        for aid, aname in action_space.items():
+            if aname.lower() in text_lower:
+                return aid, None, text
+
+        return 6, 0.0, f"Could not parse: {text[:100]}"
+
+    def teardown(self) -> None:
+        if self.model is not None:
+            del self.model
+            self.model = None
+        if self.processor is not None:
+            del self.processor
+            self.processor = None
+        # Free GPU memory
+        try:
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        except ImportError:
+            pass
diff --git a/src/v1_1/cross_domain/__init__.py b/src/v1_1/cross_domain/__init__.py
new file mode 100644
index 00000000..0930c170
--- /dev/null
+++ b/src/v1_1/cross_domain/__init__.py
@@ -0,0 +1,17 @@
+"""
+Cross-Domain Interface for MultiNet v1.1
+
+Provides canonical task specification and domain adapter abstractions
+for evaluating models across different action domains (GridWorld, Physics, NL, GUI).
+"""
+
+from .canonical_task_spec import CanonicalTaskSpec, CanonicalGoal, CanonicalObject
+from .domain_adapter import DomainAdapter, GUIAction
+
+__all__ = [
+    "CanonicalTaskSpec",
+    "CanonicalGoal",
+    "CanonicalObject",
+    "DomainAdapter",
+    "GUIAction",
+]
diff --git a/src/v1_1/cross_domain/canonical_task_spec.py b/src/v1_1/cross_domain/canonical_task_spec.py
new file mode 100644
index 00000000..39f18c97
--- /dev/null
+++ b/src/v1_1/cross_domain/canonical_task_spec.py
@@ -0,0 +1,120 @@
+"""
+Canonical Task Specification
+
+Domain-agnostic representation of tasks that can be mapped to any domain
+(GridWorld, Physics, NL, GUI). Uses normalized [0,1] coordinates for
+cross-domain compatibility.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class CanonicalGoal:
+    """Domain-agnostic goal specification."""
+    goal_type: str  # "reach", "collect", "arrange", "survive"
+    target: tuple[float, ...] | None = None  # Normalized position
+    target_ids: list[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "goal_type": self.goal_type,
+            "target": list(self.target) if self.target else None,
+            "target_ids": self.target_ids,
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "CanonicalGoal":
+        return cls(
+            goal_type=d["goal_type"],
+            target=tuple(d["target"]) if d.get("target") else None,
+            target_ids=d.get("target_ids", []),
+        )
+
+
+@dataclass
+class CanonicalObject:
+    """Domain-agnostic object specification."""
+    id: str
+    obj_type: str   # "barrier", "collectible", "interactive", "hazard", "portal"
+    position: tuple[float, ...]  # Normalized [0,1] coordinates
+    properties: dict = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "obj_type": self.obj_type,
+            "position": list(self.position),
+            "properties": self.properties,
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "CanonicalObject":
+        return cls(
+            id=d["id"],
+            obj_type=d["obj_type"],
+            position=tuple(d["position"]),
+            properties=d.get("properties", {}),
+        )
+
+
+@dataclass
+class CanonicalTaskSpec:
+    """
+    Domain-agnostic task specification.
+
+    All positions are normalized to [0,1] for cross-domain compatibility.
+    Domain-specific extensions go in domain_config.
+    """
+    task_id: str
+    seed: int
+    difficulty: int  # 1-5
+    dimensions: tuple[float, ...]     # Normalized [0,1]
+    agent_start: tuple[float, ...]    # Normalized
+    goal: CanonicalGoal               # Domain-agnostic goal
+    objects: list[CanonicalObject]     # Domain-agnostic objects
+    max_steps: int
+    description: str = ""
+    domain_config: dict = field(default_factory=dict)  # Domain-specific extensions
+
+    def to_dict(self) -> dict:
+        return {
+            "task_id": self.task_id,
+            "seed": self.seed,
+            "difficulty": self.difficulty,
+            "dimensions": list(self.dimensions),
+            "agent_start": list(self.agent_start),
+            "goal": self.goal.to_dict(),
+            "objects": [obj.to_dict() for obj in self.objects],
+            "max_steps": self.max_steps,
+            "description": self.description,
+            "domain_config": self.domain_config,
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "CanonicalTaskSpec":
+        return cls(
+            task_id=d["task_id"],
+            seed=d["seed"],
+            difficulty=d["difficulty"],
+            dimensions=tuple(d["dimensions"]),
+            agent_start=tuple(d["agent_start"]),
+            goal=CanonicalGoal.from_dict(d["goal"]),
+            objects=[CanonicalObject.from_dict(o) for o in d.get("objects", [])],
+            max_steps=d["max_steps"],
+            description=d.get("description", ""),
+            domain_config=d.get("domain_config", {}),
+        )
+
+    def to_json(self, path: str) -> None:
+        with open(path, "w") as f:
+            json.dump(self.to_dict(), f, indent=2)
+
+    @classmethod
+    def from_json(cls, path: str) -> "CanonicalTaskSpec":
+        with open(path) as f:
+            return cls.from_dict(json.load(f))
diff --git a/src/v1_1/cross_domain/domain_adapter.py b/src/v1_1/cross_domain/domain_adapter.py
new file mode 100644
index 00000000..0f9a1a27
--- /dev/null
+++ b/src/v1_1/cross_domain/domain_adapter.py
@@ -0,0 +1,108 @@
+"""
+Domain Adapter Abstract Base Class
+
+Defines the interface for mapping canonical task specifications
+to domain-specific environments and back.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Optional
+
+import numpy as np
+
+from .canonical_task_spec import CanonicalTaskSpec
+
+
+@dataclass
+class GUIAction:
+    """
+    Action type for Domain 4 (GUI manipulation) -- forward-looking.
+
+    Designed now to ensure the cross-domain interface supports
+    mouse/keyboard GUI interactions from the start.
+    """
+    action_type: str  # "mouse_click", "mouse_drag", "key_press"
+    x: float = 0.0
+    y: float = 0.0
+    drag_to_x: float = 0.0
+    drag_to_y: float = 0.0
+    key: str = ""     # For key_press actions
+
+
+class DomainAdapter(ABC):
+    """
+    Abstract base class for domain adapters.
+
+    Maps canonical task specs to domain-specific environments
+    and provides a Gymnasium-like interface for evaluation.
+
+    Implementations:
+    - GridWorldDomainAdapter: MiniGrid/MultiGrid gridworlds
+    - PhysicsDomainAdapter (future): Pymunk 2D physics
+    - NLDomainAdapter (future): Natural language commands
+    - GUIDomainAdapter (future): Pygame GUI manipulation
+    """
+
+    @property
+    @abstractmethod
+    def domain_name(self) -> str:
+        """Unique domain identifier."""
+        ...
+
+    @property
+    @abstractmethod
+    def action_type(self) -> str:
+        """Action type: 'discrete', 'continuous', 'text', 'gui'."""
+        ...
+
+    @abstractmethod
+    def from_canonical(self, spec: CanonicalTaskSpec) -> Any:
+        """
+        Convert canonical task spec to domain-specific environment.
+
+        Args:
+            spec: Domain-agnostic task specification
+
+        Returns:
+            Domain-specific environment or configuration
+        """
+        ...
+
+    @abstractmethod
+    def to_canonical(self, domain_spec: Any) -> CanonicalTaskSpec:
+        """
+        Convert domain-specific spec to canonical task spec.
+
+        Args:
+            domain_spec: Domain-specific task specification
+
+        Returns:
+            Canonical task specification
+        """
+        ...
+
+    @abstractmethod
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, dict]:
+        """Reset the environment. Returns (observation, info)."""
+        ...
+
+    @abstractmethod
+    def step(self, action: Any) -> tuple[np.ndarray, float, bool, bool, dict]:
+        """Execute action. Returns (obs, reward, terminated, truncated, info)."""
+        ...
+
+    @abstractmethod
+    def check_success(self) -> bool:
+        """Check if the task goal has been achieved."""
+        ...
+
+    def render(self) -> Optional[np.ndarray]:
+        """Render current state as RGB array."""
+        return None
+
+    def close(self) -> None:
+        """Clean up resources."""
+        pass
diff --git a/src/v1_1/cross_domain/gridworld_adapter.py b/src/v1_1/cross_domain/gridworld_adapter.py
new file mode 100644
index 00000000..47ce9a27
--- /dev/null
+++ b/src/v1_1/cross_domain/gridworld_adapter.py
@@ -0,0 +1,330 @@
+"""
+GridWorld Domain Adapter
+
+Maps canonical task specs to MiniGrid/MultiGrid environments.
+Handles coordinate normalization between [0,1] canonical space
+and integer grid coordinates.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+import numpy as np
+
+from .canonical_task_spec import CanonicalTaskSpec, CanonicalGoal, CanonicalObject
+from .domain_adapter import DomainAdapter
+
+try:
+    from ..gridworld.backends.base import AbstractGridBackend, GridState
+    from ..gridworld.backends.minigrid_backend import MiniGridBackend
+    from ..gridworld.task_spec import (
+        TaskSpecification, MazeLayout, MechanismSet, Rules, GoalSpec, Position,
+        KeySpec, DoorSpec, SwitchSpec, GateSpec, BlockSpec, TeleporterSpec, HazardSpec,
+    )
+except ImportError:
+    from gridworld.backends.base import AbstractGridBackend, GridState
+    from gridworld.backends.minigrid_backend import MiniGridBackend
+    from gridworld.task_spec import (
+        TaskSpecification, MazeLayout, MechanismSet, Rules, GoalSpec, Position,
+        KeySpec, DoorSpec, SwitchSpec, GateSpec, BlockSpec, TeleporterSpec, HazardSpec,
+    )
+
+
+# Mapping from canonical object types to MiniGrid mechanism types
+CANONICAL_TO_MECHANISM = {
+    "barrier": "wall",
+    "collectible": "key",
+    "interactive": "switch",
+    "hazard": "hazard",
+    "portal": "teleporter",
+    "door": "door",
+    "gate": "gate",
+    "block": "block",
+}
+
+
+class GridWorldDomainAdapter(DomainAdapter):
+    """
+    Domain adapter for MiniGrid/MultiGrid gridworld environments.
+
+    Converts between canonical [0,1] coordinates and integer grid positions.
+    """
+
+    def __init__(
+        self,
+        backend: Optional[AbstractGridBackend] = None,
+        render_mode: str = "rgb_array",
+    ):
+        self.backend = backend or MiniGridBackend(render_mode=render_mode)
+        self._task_spec: Optional[TaskSpecification] = None
+        self._state: Optional[GridState] = None
+        self._obs: Optional[np.ndarray] = None
+
+    @property
+    def domain_name(self) -> str:
+        return "gridworld"
+
+    @property
+    def action_type(self) -> str:
+        return "discrete"
+
+    def from_canonical(self, spec: CanonicalTaskSpec) -> TaskSpecification:
+        """Convert canonical spec to MiniGrid TaskSpecification."""
+        # Determine grid dimensions from domain_config or default
+        grid_w = spec.domain_config.get("grid_width", 10)
+        grid_h = spec.domain_config.get("grid_height", 10)
+
+        def denorm(pos: tuple[float, ...]) -> Position:
+            """Convert normalized [0,1] to grid coordinates."""
+            x = max(1, min(grid_w - 2, int(pos[0] * (grid_w - 1))))
+            y = max(1, min(grid_h - 2, int(pos[1] * (grid_h - 1))))
+            return Position(x, y)
+
+        # Build mechanisms from canonical objects
+        keys = []
+        doors = []
+        switches = []
+        gates = []
+        blocks = []
+        teleporters = []
+        hazards = []
+        walls = []
+
+        for obj in spec.objects:
+            pos = denorm(obj.position)
+            props = obj.properties
+
+            if obj.obj_type == "barrier":
+                walls.append(pos)
+            elif obj.obj_type == "collectible":
+                keys.append(KeySpec(
+                    id=obj.id,
+                    position=pos,
+                    color=props.get("color", "yellow"),
+                ))
+            elif obj.obj_type == "door":
+                doors.append(DoorSpec(
+                    id=obj.id,
+                    position=pos,
+                    requires_key=props.get("requires_key", "yellow"),
+                    initial_state=props.get("initial_state", "locked"),
+                ))
+            elif obj.obj_type == "interactive" and props.get("subtype") == "gate":
+                gates.append(GateSpec(
+                    id=obj.id,
+                    position=pos,
+                    initial_state=props.get("initial_state", "closed"),
+                ))
+            elif obj.obj_type == "interactive":
+                switches.append(SwitchSpec(
+                    id=obj.id,
+                    position=pos,
+                    controls=props.get("controls", []),
+                    switch_type=props.get("switch_type", "toggle"),
+                ))
+            elif obj.obj_type == "block":
+                blocks.append(BlockSpec(
+                    id=obj.id,
+                    position=pos,
+                    color=props.get("color", "grey"),
+                ))
+            elif obj.obj_type == "hazard":
+                hazards.append(HazardSpec(
+                    id=obj.id,
+                    position=pos,
+                    hazard_type=props.get("hazard_type", "lava"),
+                ))
+            elif obj.obj_type == "portal":
+                # Portals need paired positions
+                pos_b = props.get("position_b")
+                if pos_b:
+                    teleporters.append(TeleporterSpec(
+                        id=obj.id,
+                        position_a=pos,
+                        position_b=denorm(tuple(pos_b)),
+                        bidirectional=props.get("bidirectional", True),
+                    ))
+
+        # Build goal
+        goal_target = denorm(spec.goal.target) if spec.goal.target else None
+        goal = GoalSpec(
+            goal_type={
+                "reach": "reach_position",
+                "collect": "collect_all",
+                "arrange": "push_block_to",
+                "survive": "survive_steps",
+            }.get(spec.goal.goal_type, "reach_position"),
+            target=goal_target,
+            target_ids=spec.goal.target_ids,
+        )
+
+        start = denorm(spec.agent_start)
+        goal_pos = goal_target or Position(grid_w - 2, grid_h - 2)
+
+        task_spec = TaskSpecification(
+            task_id=spec.task_id,
+            seed=spec.seed,
+            difficulty_tier=spec.difficulty,
+            maze=MazeLayout(
+                dimensions=(grid_w, grid_h),
+                walls=walls,
+                start=start,
+                goal=goal_pos,
+            ),
+            mechanisms=MechanismSet(
+                keys=keys,
+                doors=doors,
+                switches=switches,
+                gates=gates,
+                blocks=blocks,
+                teleporters=teleporters,
+                hazards=hazards,
+            ),
+            rules=Rules(),
+            goal=goal,
+            max_steps=spec.max_steps,
+            description=spec.description,
+        )
+
+        self._task_spec = task_spec
+        return task_spec
+
+    def to_canonical(self, domain_spec: TaskSpecification) -> CanonicalTaskSpec:
+        """Convert MiniGrid TaskSpecification to canonical spec."""
+        grid_w, grid_h = domain_spec.maze.dimensions
+
+        def norm(pos: Position) -> tuple[float, float]:
+            """Convert grid coordinates to normalized [0,1]."""
+            return (pos.x / (grid_w - 1), pos.y / (grid_h - 1))
+
+        objects = []
+
+        # Convert walls
+        for wall in domain_spec.maze.walls:
+            objects.append(CanonicalObject(
+                id=f"wall_{wall.x}_{wall.y}",
+                obj_type="barrier",
+                position=norm(wall),
+            ))
+
+        # Convert keys
+        for key in domain_spec.mechanisms.keys:
+            objects.append(CanonicalObject(
+                id=key.id,
+                obj_type="collectible",
+                position=norm(key.position),
+                properties={"color": key.color},
+            ))
+
+        # Convert doors
+        for door in domain_spec.mechanisms.doors:
+            objects.append(CanonicalObject(
+                id=door.id,
+                obj_type="door",
+                position=norm(door.position),
+                properties={"requires_key": door.requires_key, "initial_state": door.initial_state},
+            ))
+
+        # Convert switches
+        for switch in domain_spec.mechanisms.switches:
+            objects.append(CanonicalObject(
+                id=switch.id,
+                obj_type="interactive",
+                position=norm(switch.position),
+                properties={"controls": switch.controls, "switch_type": switch.switch_type},
+            ))
+
+        # Convert gates
+        for gate in domain_spec.mechanisms.gates:
+            objects.append(CanonicalObject(
+                id=gate.id,
+                obj_type="interactive",
+                position=norm(gate.position),
+                properties={"subtype": "gate", "initial_state": gate.initial_state},
+            ))
+
+        # Convert blocks
+        for block in domain_spec.mechanisms.blocks:
+            objects.append(CanonicalObject(
+                id=block.id,
+                obj_type="block",
+                position=norm(block.position),
+                properties={"color": block.color},
+            ))
+
+        # Convert hazards
+        for hazard in domain_spec.mechanisms.hazards:
+            objects.append(CanonicalObject(
+                id=hazard.id,
+                obj_type="hazard",
+                position=norm(hazard.position),
+                properties={"hazard_type": hazard.hazard_type},
+            ))
+
+        # Convert teleporters
+        for tp in domain_spec.mechanisms.teleporters:
+            objects.append(CanonicalObject(
+                id=tp.id,
+                obj_type="portal",
+                position=norm(tp.position_a),
+                properties={
+                    "position_b": list(norm(tp.position_b)),
+                    "bidirectional": tp.bidirectional,
+                },
+            ))
+
+        # Convert goal
+        goal_type_map = {
+            "reach_position": "reach",
+            "collect_all": "collect",
+            "push_block_to": "arrange",
+            "survive_steps": "survive",
+        }
+        canonical_goal = CanonicalGoal(
+            goal_type=goal_type_map.get(domain_spec.goal.goal_type, "reach"),
+            target=norm(domain_spec.goal.target) if domain_spec.goal.target else None,
+            target_ids=domain_spec.goal.target_ids,
+        )
+
+        return CanonicalTaskSpec(
+            task_id=domain_spec.task_id,
+            seed=domain_spec.seed,
+            difficulty=domain_spec.difficulty_tier,
+            dimensions=(1.0, 1.0),
+            agent_start=norm(domain_spec.maze.start),
+            goal=canonical_goal,
+            objects=objects,
+            max_steps=domain_spec.max_steps,
+            description=domain_spec.description,
+            domain_config={"grid_width": grid_w, "grid_height": grid_h},
+        )
+
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, dict]:
+        """Reset environment."""
+        if self._task_spec is None:
+            raise RuntimeError("Call from_canonical() before reset()")
+        self.backend.configure(self._task_spec)
+        obs, state, info = self.backend.reset(seed=seed)
+        self._state = state
+        self._obs = obs
+        return obs, info
+
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, dict]:
+        """Execute discrete action."""
+        obs, reward, terminated, truncated, state, info = self.backend.step(action)
+        self._state = state
+        self._obs = obs
+        return obs, reward, terminated, truncated, info
+
+    def check_success(self) -> bool:
+        """Check if goal was reached."""
+        if self._state is None:
+            return False
+        return self._state.goal_reached
+
+    def render(self) -> Optional[np.ndarray]:
+        return self.backend.render()
+
+    def close(self) -> None:
+        self.backend.close()
diff --git a/src/v1_1/docs/README.md b/src/v1_1/docs/README.md
new file mode 100644
index 00000000..b867d4eb
--- /dev/null
+++ b/src/v1_1/docs/README.md
@@ -0,0 +1,480 @@
+# MiniGrid Task Framework Documentation
+
+This directory contains comprehensive documentation for the MiniGrid task specification and evaluation framework used in MultiNet.
+
+## Quick Navigation
+
+### Core Components
+
+1. **[Task Parser](./task_parser.md)** - Transforms JSON task specifications into executable environments
+2. **[MiniGrid Backend](./minigrid_backend.md)** - Production-ready square grid backend (recommended)
+3. **[MultiGrid Backend](./multigrid_backend.md)** - Experimental backend supporting exotic tilings (hex, triangle)
+
+## Overview
+
+The MiniGrid framework provides a complete pipeline for defining, parsing, and evaluating agents on gridworld navigation and puzzle-solving tasks.
+
+```
+┌─────────────────────────────────────────────────────────┐
+│              Complete Framework Architecture             │
+└─────────────────────────────────────────────────────────┘
+
+JSON Task Specification
+    │
+    ├─ maze: dimensions, walls, start, goal
+    ├─ mechanisms: keys, doors, switches, gates, blocks, hazards
+    ├─ rules: key consumption, switch types
+    └─ goal: reach_position, collect_all, push_block_to
+    │
+    ▼
+TaskSpecification (Python object)
+    │
+    ▼
+TaskParser
+    │
+    ├─ Validate specification
+    ├─ Create CustomMiniGridEnv
+    └─ Populate grid with objects
+    │
+    ▼
+Backend (MiniGrid or MultiGrid)
+    │
+    ├─ configure(task_spec)
+    ├─ reset(seed) → observation, state
+    ├─ step(action) → observation, reward, terminated, truncated, state
+    └─ render() → RGB image
+    │
+    ▼
+Evaluation / Agent Training
+```
+
+## Getting Started
+
+### Basic Usage
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# 1. Load task specification
+spec = TaskSpecification.from_json("path/to/task.json")
+
+# 2. Create and configure backend
+backend = MiniGridBackend(render_mode="rgb_array")
+backend.configure(spec)
+
+# 3. Run episode
+obs, state, info = backend.reset(seed=42)
+done = False
+
+while not done:
+    action = my_policy(obs)  # Your agent
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+    done = terminated or truncated
+
+# 4. Check results
+print(f"Success: {state.goal_reached}")
+print(f"Steps: {state.step_count}")
+```
+
+### Quick Examples
+
+#### Navigation Task
+```python
+# Simple navigation from start to goal
+from gridworld.task_parser import load_task_from_file
+
+env = load_task_from_file("tasks/tier1/navigation_8x8.json")
+obs, info = env.reset()
+# ... run episode
+```
+
+#### Key-Door Puzzle
+```python
+# Task requiring key collection and door unlocking
+spec = TaskSpecification.from_json("tasks/tier2/key_door_puzzle.json")
+backend = MiniGridBackend()
+backend.configure(spec)
+
+obs, state, info = backend.reset()
+# Agent must: find key → pickup key → unlock door → reach goal
+```
+
+#### Switch-Gate Mechanism
+```python
+# Task with remote-controlled barriers
+spec = TaskSpecification.from_json("tasks/tier3/switch_gate.json")
+backend = MiniGridBackend()
+backend.configure(spec)
+
+obs, state, info = backend.reset()
+# Agent must: find switch → toggle switch → pass through gate → reach goal
+```
+
+## Documentation Structure
+
+### Task Parser Documentation (`task_parser.md`)
+
+**Topics Covered**:
+- Architecture and design philosophy
+- Three-phase parsing (validate, create, populate)
+- Object placement order and dependencies
+- Usage examples and common patterns
+- Integration with backends
+- Performance considerations
+- Troubleshooting guide
+
+**Key Sections**:
+- Why reset() is called inside the parser
+- Object placement rules (gates before switches!)
+- Validation constraints
+- Convenience functions
+
+**Best For**: Understanding how JSON tasks become runnable environments
+
+### MiniGrid Backend Documentation (`minigrid_backend.md`)
+
+**Topics Covered**:
+- Backend abstraction layer
+- GridState extraction
+- Complete API reference
+- Action space (0-6 actions)
+- Reward structure
+- Feature support matrix
+- Performance benchmarks
+
+**Key Sections**:
+- Why we don't call env.reset() in backend.reset()
+- GridState extraction algorithm
+- Multi-seed evaluation patterns
+- Mechanism state tracking
+- Video recording
+
+**Best For**: Production evaluation setup, understanding backend interface
+
+### MultiGrid Backend Documentation (`multigrid_backend.md`)
+
+**Topics Covered**:
+- Exotic tiling support (hex, triangle)
+- Coordinate system translation (integer ↔ normalized)
+- Task specification conversion
+- Action space translation
+- Feature limitations
+- Cross-backend comparison
+
+**Key Sections**:
+- Why normalize coordinates?
+- Object type unification
+- Square vs hex vs triangle comparison
+- Known limitations and workarounds
+- Future enhancements
+
+**Best For**: Research on spatial topology, exotic grid experiments
+
+## Task Specification Format
+
+Tasks are defined in JSON format with the following structure:
+
+```json
+{
+  "task_id": "unique_identifier",
+  "seed": 42,
+  "difficulty_tier": 2,
+  "max_steps": 100,
+  "description": "Human-readable description",
+
+  "maze": {
+    "dimensions": [8, 8],
+    "start": [1, 1],
+    "goal": [6, 6],
+    "walls": [[3, 3], [3, 4], [4, 3]]
+  },
+
+  "mechanisms": {
+    "keys": [
+      {"id": "key1", "position": [2, 2], "color": "red"}
+    ],
+    "doors": [
+      {"id": "door1", "position": [4, 4],
+       "requires_key": "red", "initial_state": "locked"}
+    ],
+    "switches": [
+      {"id": "sw1", "position": [2, 5],
+       "controls": ["gate1"], "switch_type": "toggle"}
+    ],
+    "gates": [
+      {"id": "gate1", "position": [5, 5], "initial_state": "closed"}
+    ],
+    "blocks": [
+      {"id": "block1", "position": [3, 5], "color": "grey"}
+    ],
+    "hazards": [
+      {"id": "lava1", "position": [4, 6], "hazard_type": "lava"}
+    ]
+  },
+
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle"
+  },
+
+  "goal": {
+    "type": "reach_position",
+    "target": [6, 6]
+  }
+}
+```
+
+See individual documentation files for detailed schema definitions.
+
+## Difficulty Tiers
+
+Tasks are organized into 5 difficulty tiers based on complexity:
+
+| Tier | Name | Features | Example |
+|------|------|----------|---------|
+| 1 | Navigation | Basic pathfinding | Empty maze, shortest path |
+| 2 | Linear Dependencies | Sequential tasks | Collect key → unlock door → reach goal |
+| 3 | Multi-Mechanism | Parallel mechanisms | Multiple keys, switches, gates |
+| 4 | Irreversibility | One-way actions | One-shot switches, consumed keys |
+| 5 | Hidden Information | Partial observability | Hidden keys, memory requirements |
+
+## Backend Comparison
+
+| Feature | MiniGrid Backend | MultiGrid Backend |
+|---------|------------------|-------------------|
+| **Status** | Production-ready | Experimental |
+| **Tilings** | Square only | Square, hex, triangle |
+| **Performance** | Fast (~400ms/episode) | Slower (~600-900ms/episode) |
+| **Mechanisms** | Full support | Limited (keys/walls only) |
+| **Rendering** | High quality | Experimental |
+| **Partial Obs** | Supported | Not yet |
+| **Use Case** | Standard evaluation | Research on exotic tilings |
+
+**Recommendation**: Use **MiniGrid Backend** for production evaluation. Use **MultiGrid Backend** only for research requiring non-square tilings.
+
+## Common Patterns
+
+### Pattern 1: Multi-Seed Evaluation
+
+```python
+def evaluate_with_seeds(backend, task_spec, num_seeds=10):
+    backend.configure(task_spec)
+    results = []
+
+    for seed in range(num_seeds):
+        obs, state, info = backend.reset(seed=seed)
+        # ... run episode
+        results.append({"seed": seed, "success": state.goal_reached})
+
+    return results
+```
+
+### Pattern 2: Task Suite Evaluation
+
+```python
+def evaluate_task_suite(backend, task_dir):
+    results = {}
+
+    for task_file in Path(task_dir).glob("*.json"):
+        spec = TaskSpecification.from_json(task_file)
+        backend.configure(spec)
+        # ... run evaluation
+        results[spec.task_id] = metrics
+
+    return results
+```
+
+### Pattern 3: Observation Collection
+
+```python
+def collect_dataset(backend, task_spec, num_episodes=100):
+    backend.configure(task_spec)
+    dataset = []
+
+    for episode_id in range(num_episodes):
+        obs, state, info = backend.reset(seed=episode_id)
+        trajectory = {"observations": [obs], "actions": [], "rewards": []}
+
+        done = False
+        while not done:
+            action = expert_policy(obs)
+            obs, reward, terminated, truncated, state, info = backend.step(action)
+
+            trajectory["observations"].append(obs)
+            trajectory["actions"].append(action)
+            trajectory["rewards"].append(reward)
+            done = terminated or truncated
+
+        dataset.append(trajectory)
+
+    return dataset
+```
+
+## Performance Tips
+
+### 1. Reuse Parser and Backend
+```python
+# GOOD: Reuse instances
+parser = TaskParser()
+backend = MiniGridBackend()
+
+for task_file in task_files:
+    spec = TaskSpecification.from_json(task_file)
+    backend.configure(spec)
+    # ... evaluate
+
+# AVOID: Creating new instances each time
+for task_file in task_files:
+    parser = TaskParser()  # Wasteful!
+    backend = MiniGridBackend()  # Wasteful!
+    # ...
+```
+
+### 2. Choose Appropriate Render Mode
+```python
+# For headless evaluation
+backend = MiniGridBackend(render_mode="rgb_array")
+
+# For interactive debugging
+backend = MiniGridBackend(render_mode="human")
+
+# For fastest execution (no visuals needed)
+backend = MiniGridBackend(render_mode=None)
+```
+
+### 3. Close Environments
+```python
+# Always close when done
+try:
+    backend.reset()
+    # ... run episodes
+finally:
+    backend.close()  # Cleanup resources
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **RuntimeError: Backend must be configured before reset**
+   - Solution: Call `backend.configure(spec)` before `backend.reset()`
+
+2. **Objects not appearing in environment**
+   - Check task JSON has mechanisms defined
+   - Validate spec: `spec.validate()`
+
+3. **Switch references non-existent gate**
+   - Ensure gate IDs in task spec match switch.controls
+
+4. **Agent spawns in wrong position**
+   - Check for position conflicts in task spec
+   - Parser places agent last to handle conflicts
+
+5. **Unexpected reward values**
+   - Check if agent stepped on hazard (reward=0, terminated=True)
+   - vs reaching goal (reward>0, terminated=True)
+
+See individual documentation files for detailed troubleshooting guides.
+
+## API Quick Reference
+
+### TaskParser
+- `TaskParser(render_mode=None)`: Create parser
+- `.parse(spec, seed=None)`: Parse TaskSpecification → environment
+- `.parse_file(path)`: Load and parse JSON file
+- `.parse_dict(data)`: Parse dictionary
+
+### Backend Interface (MiniGrid and MultiGrid)
+- `.__init__(...)`: Initialize backend
+- `.configure(task_spec)`: Set task to use
+- `.reset(seed=None)`: Reset to initial state
+- `.step(action)`: Execute action
+- `.render()`: Get RGB image
+- `.get_mission_text()`: Get goal description
+- `.get_state()`: Get GridState
+- `.close()`: Cleanup
+
+### TaskSpecification
+- `.from_json(path)`: Load from file
+- `.from_dict(data)`: Load from dictionary
+- `.validate()`: Check consistency
+- `.to_json(path)`: Save to file
+- `.get_mission_text()`: Generate description
+
+## File Locations
+
+```
+src/v1_1/
+├── gridworld/
+│   ├── task_spec.py              # TaskSpecification schema
+│   ├── task_parser.py            # Parser implementation
+│   ├── custom_env.py             # CustomMiniGridEnv
+│   └── backends/
+│       ├── base.py               # AbstractGridBackend interface
+│       ├── minigrid_backend.py   # MiniGrid implementation
+│       └── multigrid_backend.py  # MultiGrid implementation
+│
+├── multigrid/                    # Custom MultiGrid environment
+│   └── env.py
+│
+└── docs/                         # This directory
+    ├── README.md                 # This file
+    ├── task_parser.md            # Task Parser docs
+    ├── minigrid_backend.md       # MiniGrid Backend docs
+    └── multigrid_backend.md      # MultiGrid Backend docs
+```
+
+## Related Resources
+
+### Code Files
+- `gridworld/task_spec.py`: Complete TaskSpecification schema with validation
+- `gridworld/custom_env.py`: Custom MiniGrid environment with all mechanisms
+- `gridworld/backends/base.py`: Backend interface and GridState definition
+
+### Example Tasks
+- `tasks/tier1/`: Navigation tasks
+- `tasks/tier2/`: Key-door puzzles
+- `tasks/tier3/`: Switch-gate mechanisms
+- `tasks/tier4/`: Irreversible actions
+- `tasks/tier5/`: Hidden information
+
+### Evaluation Scripts
+- `scripts/eval_minigrid.py`: Evaluation runner
+- `scripts/generate_tasks.py`: Task generation utilities
+
+## Contributing
+
+When adding new features to the framework:
+
+1. **Update inline documentation**: Add comprehensive docstrings and comments
+2. **Update markdown docs**: Reflect changes in relevant .md files
+3. **Add examples**: Include usage examples in documentation
+4. **Update comparison tables**: Keep feature matrices current
+5. **Note limitations**: Document known issues and workarounds
+
+## Version History
+
+- **v1.1**: Current version
+  - MiniGrid Backend: Production-ready
+  - MultiGrid Backend: Experimental
+  - Full mechanism support in MiniGrid
+  - Comprehensive documentation
+
+- **v1.0**: Initial release
+  - Basic task specification
+  - MiniGrid backend only
+  - Limited documentation
+
+## Contact and Support
+
+For issues, questions, or contributions:
+- See main MultiNet repository README
+- Check individual documentation files for detailed troubleshooting
+- Review inline code comments for implementation details
+
+---
+
+**Last Updated**: 2026-01-30
+
+**Documentation Status**: Complete and ready for production use
diff --git a/src/v1_1/docs/minigrid_backend.md b/src/v1_1/docs/minigrid_backend.md
new file mode 100644
index 00000000..ea2b3669
--- /dev/null
+++ b/src/v1_1/docs/minigrid_backend.md
@@ -0,0 +1,793 @@
+# MiniGrid Backend Documentation
+
+## Overview
+
+The MiniGrid Backend is a production-ready implementation of the `AbstractGridBackend` interface that wraps the gymnasium MiniGrid package. It provides a stable, well-tested foundation for evaluating agents on gridworld navigation and puzzle-solving tasks.
+
+**Purpose**: Enable evaluation of vision-language-action models on standard square-grid environments with comprehensive mechanism support (keys, doors, switches, gates, blocks, hazards).
+
+**Location**: `/src/v1_1/gridworld/backends/minigrid_backend.py`
+
+**Status**: MVP (Minimum Viable Product) - Production ready
+
+---
+
+## Architecture
+
+### Backend Abstraction Layer
+
+The MiniGrid Backend implements the `AbstractGridBackend` interface, which defines a standard API that all grid environment backends must support. This abstraction allows:
+
+- **Backend Swapping**: Switch between MiniGrid and MultiGrid (or future backends) without changing evaluation code
+- **Consistent API**: Same methods and return types across all backends
+- **Backend-Agnostic State**: GridState representation works with any backend
+
+```
+┌───────────────────────────────────────────────────────────┐
+│              Backend Abstraction Architecture              │
+└───────────────────────────────────────────────────────────┘
+
+    TaskSpecification (JSON)
+            │
+            ▼
+    ┌──────────────────┐
+    │AbstractGridBackend│ ◄─── Common interface
+    └────────┬──────────┘
+         ┌───┴────┐
+         ▼        ▼
+    ┌─────────┐ ┌──────────────┐
+    │MiniGrid │ │  MultiGrid   │
+    │Backend  │ │  Backend     │
+    │(This)   │ │(Exotic tiles)│
+    └────┬────┘ └──────────────┘
+         │
+         ├──► TaskParser (creates env from spec)
+         │
+         ├──► CustomMiniGridEnv (gymnasium-based)
+         │
+         └──► GridState (backend-agnostic state)
+```
+
+### Component Interaction
+
+```
+┌─────────────────────────────────────────────────────────┐
+│              MiniGrid Backend Workflow                   │
+└─────────────────────────────────────────────────────────┘
+
+1. CONFIGURATION
+   backend.configure(task_spec)
+       │
+       └──► Store task_spec for later use
+            Set _configured = True
+
+2. RESET
+   backend.reset(seed=42)
+       │
+       ├──► parser.parse(task_spec, seed)
+       │      │
+       │      ├──► Create CustomMiniGridEnv
+       │      ├──► env.reset() [initializes grid]
+       │      └──► Populate grid with objects
+       │
+       ├──► env.gen_obs() [symbolic observation]
+       ├──► env.render() [RGB image]
+       ├──► _get_grid_state() [extract state]
+       │
+       └──► Return (rgb_obs, state, info)
+
+3. STEP
+   backend.step(action)
+       │
+       ├──► env.step(action) [execute in MiniGrid]
+       ├──► env.render() [get new RGB obs]
+       ├──► _get_grid_state() [extract new state]
+       │
+       └──► Return (obs, reward, terminated, truncated, state, info)
+
+4. RENDER
+   backend.render()
+       │
+       └──► env.render() [RGB image of current state]
+```
+
+---
+
+## Key Components
+
+### MiniGridBackend Class
+
+```python
+class MiniGridBackend(AbstractGridBackend):
+    """
+    Backend implementation using gymnasium's MiniGrid package.
+    """
+
+    def __init__(self, render_mode: Optional[str] = "rgb_array")
+    def configure(self, task_spec: TaskSpecification) -> None
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, GridState, dict]
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, GridState, dict]
+    def render(self) -> np.ndarray
+    def get_mission_text(self) -> str
+    def get_state(self) -> GridState
+    def close(self) -> None
+```
+
+### Constructor: `__init__(render_mode)`
+
+**Parameters**:
+- `render_mode` (str, optional): Rendering mode for the environment
+  - `"rgb_array"`: Returns RGB numpy arrays (recommended for evaluation)
+  - `"human"`: Opens a window for visualization (for debugging)
+  - `None`: Minimal rendering (fastest)
+
+**Default**: `"rgb_array"`
+
+**Example**:
+```python
+from gridworld.backends import MiniGridBackend
+
+# Production evaluation setup
+backend = MiniGridBackend(render_mode="rgb_array")
+
+# Interactive debugging
+backend = MiniGridBackend(render_mode="human")
+```
+
+**Initialization Details**:
+- Creates a `TaskParser` instance with the specified render mode
+- Initializes `self.env` to None (environment created on reset)
+- Sets up observation caching (`_last_obs`)
+
+### Method: `configure(task_spec)`
+
+Configures the backend with a task specification. This is the first method that must be called.
+
+**Parameters**:
+- `task_spec` (TaskSpecification): The task definition to use
+
+**Returns**: None
+
+**Side Effects**:
+- Stores `task_spec` for use in `reset()`
+- Sets `_configured` flag to True
+
+**Example**:
+```python
+from gridworld.task_spec import TaskSpecification
+from gridworld.backends import MiniGridBackend
+
+# Load task specification
+spec = TaskSpecification.from_json("task.json")
+
+# Configure backend
+backend = MiniGridBackend()
+backend.configure(spec)
+
+# Now ready for reset()
+```
+
+**Design Note**: Configuration is separate from reset to allow:
+1. Pre-validation of task specs before environment creation
+2. Reusing the same backend with different tasks
+3. Lazy environment creation (only on reset)
+
+### Method: `reset(seed=None)`
+
+Resets the environment to its initial state and returns the starting observation.
+
+**Parameters**:
+- `seed` (int, optional): Random seed for reproducibility. If None, uses `task_spec.seed`
+
+**Returns**:
+- `observation` (np.ndarray): RGB image of initial state, shape (H, W, 3)
+- `state` (GridState): Backend-agnostic state representation
+- `info` (dict): Additional information (currently empty)
+
+**Raises**:
+- `RuntimeError`: If `configure()` has not been called
+
+**Example**:
+```python
+# Reset with task's default seed
+obs, state, info = backend.reset()
+
+# Reset with specific seed for evaluation
+obs, state, info = backend.reset(seed=42)
+
+print(f"Observation shape: {obs.shape}")
+print(f"Agent at: {state.agent_position}")
+print(f"Agent facing: {state.agent_direction}")
+```
+
+**Critical Implementation Detail - Why We Don't Call env.reset() Here**:
+
+The `reset()` method uses `parser.parse()` to create a fresh environment. The parser internally calls `env.reset()` to initialize the grid, then populates it with objects. **We must NOT call `env.reset()` again** in the backend's `reset()` method because:
+
+1. It would wipe out all placed objects (keys, doors, switches, etc.)
+2. The grid would be empty except for border walls
+3. The task would be unplayable
+
+This is a deliberate architectural choice:
+- **TaskParser responsibility**: Create + reset + populate
+- **Backend responsibility**: Trigger parser + extract observations
+
+### Method: `step(action)`
+
+Executes one action in the environment and returns the result.
+
+**Parameters**:
+- `action` (int): Action to execute (0-6)
+  - 0: Turn left
+  - 1: Turn right
+  - 2: Move forward
+  - 3: Pickup object
+  - 4: Drop object
+  - 5: Toggle/interact
+  - 6: Done/wait
+
+**Returns**:
+- `observation` (np.ndarray): RGB image of new state
+- `reward` (float): Reward for this step
+- `terminated` (bool): True if episode ended (goal reached or failure)
+- `truncated` (bool): True if episode cut short (max steps reached)
+- `state` (GridState): New backend-agnostic state
+- `info` (dict): Additional information from environment
+
+**Raises**:
+- `RuntimeError`: If `reset()` has not been called
+
+**Example**:
+```python
+# Execute forward action
+obs, reward, terminated, truncated, state, info = backend.step(2)
+
+if terminated:
+    if reward > 0:
+        print("Goal reached!")
+    else:
+        print("Episode failed (e.g., stepped on lava)")
+
+if truncated:
+    print("Max steps reached without solving")
+
+# Check if agent is carrying something
+if state.agent_carrying:
+    print(f"Agent holding: {state.agent_carrying}")
+
+# Check mechanism states
+print(f"Active switches: {state.active_switches}")
+print(f"Open gates: {state.open_gates}")
+```
+
+**Reward Structure**:
+
+MiniGrid uses a time-penalized reward:
+```python
+reward = 1.0 - 0.9 * (step_count / max_steps)
+```
+
+- **Goal reached immediately**: reward = 1.0
+- **Goal reached at 50% steps**: reward = 0.55
+- **Goal reached at max steps**: reward = 0.1
+- **Failed or truncated**: reward = 0
+
+This encourages efficient solutions.
+
+### Method: `render()`
+
+Returns an RGB rendering of the current environment state.
+
+**Returns**:
+- `np.ndarray`: RGB image, shape (H, W, 3), dtype uint8
+
+**Example**:
+```python
+import matplotlib.pyplot as plt
+
+# Get current rendering
+rgb_image = backend.render()
+
+# Display
+plt.imshow(rgb_image)
+plt.title("Current Environment State")
+plt.axis('off')
+plt.show()
+```
+
+**Behavior**:
+- If `render_mode="rgb_array"`, calls `env.render()`
+- If other render mode, returns cached `_last_obs`
+- If no observations yet, returns black placeholder
+
+### Method: `get_mission_text()`
+
+Returns the mission/goal description for the current task.
+
+**Returns**:
+- `str`: Human-readable mission description
+
+**Example**:
+```python
+mission = backend.get_mission_text()
+print(mission)
+# Output: "Navigate to the goal. Keys: 2. Locked doors: 2."
+```
+
+**Text Sources** (in order of priority):
+1. Environment's mission text (if environment exists)
+2. Task spec's mission text (if task configured)
+3. Default text: "Navigate to the goal"
+
+### Method: `get_state()`
+
+Returns the current environment state as a GridState object.
+
+**Returns**:
+- `GridState`: Backend-agnostic state representation
+
+**Example**:
+```python
+state = backend.get_state()
+print(f"Position: {state.agent_position}")
+print(f"Direction: {state.agent_direction}")
+print(f"Steps: {state.step_count}/{state.max_steps}")
+print(f"Goal reached: {state.goal_reached}")
+```
+
+### Method: `close()`
+
+Cleans up resources and closes the environment.
+
+**Example**:
+```python
+# Done with environment
+backend.close()
+```
+
+**Best Practice**:
+```python
+try:
+    backend.reset()
+    # ... run episode ...
+finally:
+    backend.close()  # Ensure cleanup
+```
+
+---
+
+## GridState Extraction
+
+### The `_get_grid_state()` Method
+
+This internal method converts the MiniGrid environment state into a backend-agnostic `GridState` object. This is crucial for evaluation and backend comparison.
+
+**What It Extracts**:
+
+1. **Agent State**:
+   - Position: `(x, y)` tuple
+   - Direction: Integer 0-3 (right, down, left, up)
+   - Carrying: Color of held object or None
+
+2. **Mechanism States**:
+   - Active switches: Set of switch IDs currently toggled on
+   - Open gates: Set of gate IDs currently passable
+   - Block positions: Dict mapping block_id → (x, y)
+
+3. **Episode State**:
+   - Step count: Number of steps taken
+   - Max steps: Episode step limit
+   - Goal reached: Boolean flag
+
+**Performance Consideration**:
+
+Block position extraction requires a full grid scan (O(width × height) per block). For a typical 8×8 grid with 3 blocks, this is ~192 cell checks per step. Acceptable for evaluation but could be optimized with position caching for larger grids or real-time applications.
+
+**Example Output**:
+```python
+state = backend.get_state()
+# GridState(
+#     agent_position=(4, 5),
+#     agent_direction=2,  # Facing left
+#     agent_carrying="red",  # Holding red key
+#     step_count=15,
+#     max_steps=100,
+#     open_doors=set(),
+#     collected_keys=set(),
+#     active_switches={'sw1'},  # Switch sw1 is active
+#     open_gates={'gate1'},  # Gate gate1 is open
+#     block_positions={'block1': (3, 3), 'block2': (5, 6)},
+#     goal_reached=False
+# )
+```
+
+---
+
+## Usage Examples
+
+### Example 1: Basic Episode Execution
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Load task
+spec = TaskSpecification.from_json("tasks/navigation_8x8.json")
+
+# Create and configure backend
+backend = MiniGridBackend(render_mode="rgb_array")
+backend.configure(spec)
+
+# Run episode
+obs, state, info = backend.reset(seed=42)
+done = False
+total_reward = 0
+step_count = 0
+
+while not done:
+    # Random policy (replace with your agent)
+    action = np.random.randint(0, 7)
+
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+    total_reward += reward
+    step_count += 1
+    done = terminated or truncated
+
+    print(f"Step {step_count}: pos={state.agent_position}, "
+          f"reward={reward:.3f}, done={done}")
+
+print(f"\nEpisode finished:")
+print(f"  Total reward: {total_reward:.3f}")
+print(f"  Steps taken: {step_count}")
+print(f"  Success: {state.goal_reached}")
+
+backend.close()
+```
+
+### Example 2: Multi-Seed Evaluation
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+def evaluate_policy(policy_fn, task_path, num_seeds=10):
+    """
+    Evaluate a policy across multiple seeds.
+    """
+    spec = TaskSpecification.from_json(task_path)
+    backend = MiniGridBackend(render_mode="rgb_array")
+    backend.configure(spec)
+
+    results = []
+    for seed in range(num_seeds):
+        obs, state, info = backend.reset(seed=seed)
+        done = False
+        total_reward = 0
+        steps = 0
+
+        while not done:
+            action = policy_fn(obs, state)
+            obs, reward, terminated, truncated, state, info = backend.step(action)
+            total_reward += reward
+            steps += 1
+            done = terminated or truncated
+
+        results.append({
+            "seed": seed,
+            "success": state.goal_reached,
+            "reward": total_reward,
+            "steps": steps
+        })
+
+    backend.close()
+
+    # Aggregate results
+    success_rate = sum(r["success"] for r in results) / len(results)
+    avg_reward = sum(r["reward"] for r in results) / len(results)
+    avg_steps = sum(r["steps"] for r in results) / len(results)
+
+    return {
+        "success_rate": success_rate,
+        "avg_reward": avg_reward,
+        "avg_steps": avg_steps,
+        "per_seed": results
+    }
+
+# Example usage
+def random_policy(obs, state):
+    return np.random.randint(0, 7)
+
+results = evaluate_policy(random_policy, "task.json", num_seeds=10)
+print(f"Success rate: {results['success_rate']:.1%}")
+```
+
+### Example 3: Observation and State Comparison
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Setup
+spec = TaskSpecification.from_json("task.json")
+backend = MiniGridBackend(render_mode="rgb_array")
+backend.configure(spec)
+
+# Reset
+obs, state, info = backend.reset(seed=42)
+
+print("Initial State:")
+print(f"  RGB observation shape: {obs.shape}")
+print(f"  Agent position: {state.agent_position}")
+print(f"  Agent direction: {state.agent_direction}")
+print(f"  Mission: {backend.get_mission_text()}")
+
+# Take a few actions
+for action in [2, 2, 5]:  # Forward, forward, toggle
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+    print(f"\nAfter action {action}:")
+    print(f"  Position: {state.agent_position}")
+    print(f"  Carrying: {state.agent_carrying}")
+    print(f"  Active switches: {state.active_switches}")
+    print(f"  Reward: {reward}")
+
+backend.close()
+```
+
+### Example 4: Mechanism State Tracking
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Task with switches and gates
+spec = TaskSpecification.from_json("tasks/switch_gate_puzzle.json")
+backend = MiniGridBackend()
+backend.configure(spec)
+
+obs, state, info = backend.reset()
+
+print("Initial mechanism states:")
+print(f"  Active switches: {state.active_switches}")
+print(f"  Open gates: {state.open_gates}")
+
+# Agent navigates and toggles a switch
+# ... execute actions ...
+
+# After toggling switch
+state = backend.get_state()
+print("\nAfter toggling switch:")
+print(f"  Active switches: {state.active_switches}")
+print(f"  Open gates: {state.open_gates}")
+
+# Check if gate is now passable
+if 'gate1' in state.open_gates:
+    print("Gate 1 is now open and passable!")
+```
+
+### Example 5: Video Recording
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+import imageio
+
+# Setup
+spec = TaskSpecification.from_json("task.json")
+backend = MiniGridBackend(render_mode="rgb_array")
+backend.configure(spec)
+
+# Record episode
+frames = []
+obs, state, info = backend.reset(seed=42)
+frames.append(backend.render())
+
+done = False
+while not done:
+    action = my_policy(obs)
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+    frames.append(backend.render())
+    done = terminated or truncated
+
+backend.close()
+
+# Save video
+imageio.mimsave("episode.mp4", frames, fps=4)
+print(f"Saved {len(frames)} frames to episode.mp4")
+```
+
+---
+
+## Feature Support
+
+### Supported Mechanisms
+
+| Mechanism | Supported | Notes |
+|-----------|-----------|-------|
+| Walls | ✓ | Static barriers |
+| Keys | ✓ | Collectible items, multiple colors |
+| Doors | ✓ | Locked/unlocked, require matching key color |
+| Switches | ✓ | Toggle, hold, and one-shot types |
+| Gates | ✓ | Controlled by switches |
+| Blocks | ✓ | Pushable Sokoban-style |
+| Hazards | ✓ | Lava (episode-ending) |
+| Teleporters | ✗ | Not implemented in MiniGrid |
+| Partial Observability | ✓ | Agent has limited field of view |
+
+### Supported Goal Types
+
+| Goal Type | Supported | Description |
+|-----------|-----------|-------------|
+| Reach Position | ✓ | Navigate to goal position |
+| Collect All | Partial | Can collect keys, but goal checking not fully implemented |
+| Push Block To | Partial | Blocks are pushable, but goal checking not fully implemented |
+| Survive Steps | ✓ | Don't die until max steps |
+
+**Note**: For full multi-goal support, use the goal specification and implement custom win condition checking in your evaluation code.
+
+### Rendering Modes
+
+| Mode | Description | Use Case |
+|------|-------------|----------|
+| `rgb_array` | Returns RGB numpy arrays | Headless evaluation, ML training |
+| `human` | Opens visualization window | Interactive debugging |
+| `None` | Minimal rendering | Fastest for non-visual evaluation |
+
+**Recommendation**: Use `"rgb_array"` for all evaluation to ensure consistent observations.
+
+---
+
+## Performance Characteristics
+
+### Timing Benchmarks (8×8 grid, typical task)
+
+| Operation | Time | Notes |
+|-----------|------|-------|
+| configure() | ~0.1 ms | Just stores task spec |
+| reset() | ~8-12 ms | Parser + grid population |
+| step() | ~2-4 ms | Action execution + state extraction |
+| render() | ~3-5 ms | RGB image generation |
+| get_state() | ~1-2 ms | GridState extraction |
+
+**Total episode (100 steps)**: ~400-600 ms
+
+### Memory Usage
+
+- **Backend instance**: ~1 KB (just metadata)
+- **Environment instance**: ~50-100 KB (grid, objects, render buffer)
+- **RGB observation**: ~150 KB for 64×64×3 uint8 image
+
+**Recommendation**: For large-scale evaluation (1000s of episodes), create environments on-demand and close them when done to avoid memory accumulation.
+
+---
+
+## Integration with Evaluation Pipeline
+
+### Standard Evaluation Pattern
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+def run_evaluation(agent, task_files, num_seeds=5):
+    """
+    Standard evaluation loop using MiniGrid backend.
+    """
+    backend = MiniGridBackend(render_mode="rgb_array")
+    results = {}
+
+    for task_file in task_files:
+        spec = TaskSpecification.from_json(task_file)
+        backend.configure(spec)
+
+        task_results = []
+        for seed in range(num_seeds):
+            obs, state, info = backend.reset(seed=seed)
+
+            episode_data = {
+                "observations": [obs],
+                "states": [state.to_dict()],
+                "actions": [],
+                "rewards": []
+            }
+
+            done = False
+            while not done:
+                action = agent.predict(obs)
+                obs, reward, terminated, truncated, state, info = backend.step(action)
+
+                episode_data["observations"].append(obs)
+                episode_data["states"].append(state.to_dict())
+                episode_data["actions"].append(action)
+                episode_data["rewards"].append(reward)
+
+                done = terminated or truncated
+
+            episode_data["success"] = state.goal_reached
+            episode_data["total_reward"] = sum(episode_data["rewards"])
+            task_results.append(episode_data)
+
+        results[spec.task_id] = task_results
+
+    backend.close()
+    return results
+```
+
+---
+
+## Troubleshooting
+
+### Issue 1: RuntimeError on reset()
+
+**Error**: `RuntimeError: Backend must be configured before reset`
+
+**Cause**: Called `reset()` before `configure()`
+
+**Solution**:
+```python
+# WRONG
+backend = MiniGridBackend()
+backend.reset()  # Error!
+
+# CORRECT
+backend = MiniGridBackend()
+backend.configure(task_spec)
+backend.reset()  # Works
+```
+
+### Issue 2: Objects Not Appearing
+
+**Symptom**: Environment is empty except for walls
+
+**Cause**: Task specification has no mechanisms, or parser error
+
+**Solution**:
+1. Check task JSON has mechanisms defined
+2. Validate task spec: `spec.validate()`
+3. Check parser logs for errors
+
+### Issue 3: Unexpected Reward Values
+
+**Symptom**: Reward is 0 even though goal reached
+
+**Cause**: Stepped on hazard before reaching goal
+
+**Solution**: Check `state.terminated` to distinguish:
+- `terminated=True, reward>0`: Goal reached
+- `terminated=True, reward=0`: Failed (hazard, etc.)
+- `truncated=True, reward=0`: Max steps reached
+
+### Issue 4: GridState Has Wrong Block Positions
+
+**Symptom**: `state.block_positions` is incorrect
+
+**Cause**: Blocks were pushed but state not updated
+
+**Solution**: This is a known limitation. GridState extraction scans the grid, so it should be accurate. If you're seeing errors, check:
+1. Are you using a cached state instead of calling `get_state()` after each step?
+2. Are multiple blocks at the same position (invalid task)?
+
+---
+
+## Comparison with MultiGrid Backend
+
+| Feature | MiniGridBackend | MultiGridBackend |
+|---------|-----------------|------------------|
+| **Tilings** | Square only | Square, hex, triangle |
+| **Maturity** | Production-ready | Experimental |
+| **Performance** | Fast (~400ms/episode) | Slower (~600ms/episode) |
+| **Switches/Gates** | Fully supported | Not yet implemented |
+| **Partial Observability** | Supported | Not yet implemented |
+| **Render Quality** | High (MiniGrid native) | Variable |
+| **Use Case** | Standard evaluation | Research on exotic tilings |
+
+**Recommendation**: Use MiniGridBackend for production evaluation. Use MultiGridBackend only for research requiring non-square tilings.
+
+---
+
+## See Also
+
+- [AbstractGridBackend Interface](../gridworld/backends/base.py): Base interface documentation
+- [Task Parser Documentation](./task_parser.md): How tasks are parsed into environments
+- [MultiGrid Backend Documentation](./multigrid_backend.md): Alternative backend for exotic tilings
+- [TaskSpecification Schema](../gridworld/task_spec.py): JSON format for tasks
+- [Evaluation Pipeline Guide](../../docs/evaluation.md): End-to-end evaluation setup
diff --git a/src/v1_1/docs/multigrid_backend.md b/src/v1_1/docs/multigrid_backend.md
new file mode 100644
index 00000000..ca233ec6
--- /dev/null
+++ b/src/v1_1/docs/multigrid_backend.md
@@ -0,0 +1,1085 @@
+# MultiGrid Backend Documentation
+
+## Overview
+
+The MultiGrid Backend is an experimental implementation of the `AbstractGridBackend` interface that supports exotic grid tilings (hexagonal and triangular) in addition to standard square grids. It bridges the standard MiniGrid task specification format with a custom MultiGrid environment system designed for research on non-traditional spatial representations.
+
+**Purpose**: Enable research and evaluation on exotic grid tilings while maintaining compatibility with the standard backend interface and task specification format.
+
+**Location**: `/src/v1_1/gridworld/backends/multigrid_backend.py`
+
+**Status**: Experimental - Research use only
+
+**Target Audience**: Researchers investigating how agents generalize across different spatial topologies.
+
+---
+
+## Architecture
+
+### Exotic Tiling Support
+
+The key differentiator of MultiGrid Backend is its support for three tiling types:
+
+1. **Square Tiling** (Standard): 4-connected grid with 90° rotations
+2. **Hexagonal Tiling**: 6-connected grid with 60° rotations
+3. **Triangular Tiling**: Variable connectivity with complex navigation
+
+```
+┌───────────────────────────────────────────────────────────┐
+│                   Tiling Types                             │
+└───────────────────────────────────────────────────────────┘
+
+SQUARE (4-connected)        HEXAGONAL (6-connected)
+┌───┬───┬───┬───┐              ⬡   ⬡   ⬡   ⬡
+│   │   │   │   │             ⬡   ⬡   ⬡   ⬡
+├───┼───┼───┼───┤              ⬡   ⬡   ⬡   ⬡
+│   │ A │   │   │             ⬡   A   ⬡   ⬡
+├───┼───┼───┼───┤              ⬡   ⬡   ⬡   ⬡
+│   │   │   │   │             ⬡   ⬡   ⬡   ⬡
+└───┴───┴───┴───┘
+
+Neighbors: 4 (N/S/E/W)      Neighbors: 6 (all adjacent)
+
+TRIANGULAR (variable)
+    △ ▽ △ ▽
+    ▽ △ ▽ △
+    △ A △ ▽
+    ▽ △ ▽ △
+
+Neighbors: 3 or 9 depending on orientation
+```
+
+### Component Interaction
+
+```
+┌─────────────────────────────────────────────────────────┐
+│           MultiGrid Backend Architecture                 │
+└─────────────────────────────────────────────────────────┘
+
+TaskSpecification (MiniGrid format)
+         │
+         ▼
+┌────────────────────────┐
+│ MultiGridBackend       │
+│  ._convert_task_spec() │
+└───────┬────────────────┘
+        │
+        ├──► Convert coordinates: integer → normalized [0,1]
+        ├──► Convert objects: keys/doors/blocks → unified format
+        ├──► Add tiling specification
+        │
+        ▼
+MultiGrid Task Spec (dict)
+        │
+        ▼
+┌────────────────────────┐
+│  MultiGridEnv          │
+│  (custom environment)  │
+└───────┬────────────────┘
+        │
+        ├──► Tiling: square/hex/triangle
+        ├──► Scene: agent + objects + walls
+        ├──► Goal: reach/collect/push
+        │
+        ▼
+   GridState (backend-agnostic)
+```
+
+### Coordinate System Translation
+
+A major architectural challenge is coordinate system conversion:
+
+**MiniGrid Format** (Integer Grid):
+- Position: `(x=3, y=5)` in an 8×8 grid
+- Semantics: Absolute grid cell coordinates
+- Range: `[0, width)` × `[0, height)`
+
+**MultiGrid Format** (Normalized Continuous):
+- Position: `{"x": 0.375, "y": 0.625}`
+- Semantics: Normalized position in [0, 1] × [0, 1]
+- Calculation: `x_norm = x / width`, `y_norm = y / height`
+
+**Rationale**: Normalized coordinates allow the same task to be rendered on different tilings. A task defined on a square grid can be "ported" to hexagonal by reinterpreting the normalized positions.
+
+---
+
+## Key Components
+
+### MultiGridBackend Class
+
+```python
+class MultiGridBackend(AbstractGridBackend):
+    """
+    Backend adapter for the custom MultiGrid system.
+    Supports exotic tilings: square, hex, triangle.
+    """
+
+    def __init__(self, tiling="square", render_mode="rgb_array",
+                 render_width=640, render_height=640)
+    def configure(self, task_spec: TaskSpecification) -> None
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, GridState, dict]
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, GridState, dict]
+    def render(self) -> np.ndarray
+    def get_mission_text(self) -> str
+    def get_state(self) -> GridState
+    def close(self) -> None
+
+    # Internal methods
+    def _convert_task_spec(self, spec: TaskSpecification) -> dict
+    def _build_grid_state(self) -> GridState
+```
+
+### Constructor: `__init__(tiling, render_mode, render_width, render_height)`
+
+**Parameters**:
+- `tiling` (str): Tiling type
+  - `"square"`: Standard 4-connected grid (default)
+  - `"hex"`: Hexagonal 6-connected grid
+  - `"triangle"`: Triangular variable-connected grid
+- `render_mode` (str): Rendering mode
+  - `"rgb_array"`: Returns RGB numpy arrays (recommended)
+  - `"human"`: Opens visualization window
+- `render_width` (int): Width of rendered images in pixels (default 640)
+- `render_height` (int): Height of rendered images in pixels (default 640)
+
+**Example**:
+```python
+from gridworld.backends import MultiGridBackend
+
+# Standard square tiling (same as MiniGrid)
+backend = MultiGridBackend(tiling="square")
+
+# Hexagonal tiling for research
+backend = MultiGridBackend(tiling="hex", render_mode="rgb_array")
+
+# Triangle tiling with custom render size
+backend = MultiGridBackend(tiling="triangle",
+                           render_width=800,
+                           render_height=800)
+```
+
+**Initialization Details**:
+- Stores tiling type and rendering parameters
+- Does NOT create environment (lazy initialization on configure)
+- Initializes step tracking (`_step_count`, `_max_steps`)
+
+### Method: `configure(task_spec)`
+
+Configures the backend with a task specification and creates the MultiGrid environment.
+
+**Parameters**:
+- `task_spec` (TaskSpecification): Task to configure
+
+**Returns**: None
+
+**Side Effects**:
+- Converts task spec to MultiGrid format
+- Creates `MultiGridEnv` instance
+- Sets `_configured` flag
+
+**Example**:
+```python
+from gridworld.task_spec import TaskSpecification
+from gridworld.backends import MultiGridBackend
+
+# Load standard MiniGrid task
+spec = TaskSpecification.from_json("task.json")
+
+# Configure with hexagonal tiling
+backend = MultiGridBackend(tiling="hex")
+backend.configure(spec)
+
+# The same task is now running on a hex grid!
+```
+
+**Conversion Process**:
+
+The `_convert_task_spec()` method transforms MiniGrid format → MultiGrid format:
+
+1. **Coordinates**: Integer grid positions → Normalized [0,1] positions
+2. **Objects**: Separate mechanism types → Unified objects list
+3. **Tiling**: Implicit square → Explicit tiling specification
+4. **Goal**: Standard format → MultiGrid goal spec
+
+See "Task Specification Conversion" section for details.
+
+### Method: `reset(seed=None)`
+
+Resets the environment to initial state.
+
+**Parameters**:
+- `seed` (int, optional): Random seed for reproducibility
+
+**Returns**:
+- `observation` (np.ndarray): RGB image of initial state
+- `state` (GridState): Backend-agnostic state
+- `info` (dict): Additional information
+
+**Raises**:
+- `RuntimeError`: If not configured
+
+**Example**:
+```python
+obs, state, info = backend.reset(seed=42)
+print(f"Observation shape: {obs.shape}")  # (640, 640, 3)
+print(f"Agent position: {state.agent_position}")
+```
+
+**Note**: Unlike MiniGridBackend, MultiGridBackend does NOT use TaskParser. It directly creates a MultiGridEnv from the converted task spec.
+
+### Method: `step(action)`
+
+Executes one action with automatic action space translation.
+
+**Parameters**:
+- `action` (int): MiniGrid action (0-6)
+
+**Returns**:
+- `observation`, `reward`, `terminated`, `truncated`, `state`, `info`
+
+**Action Translation**:
+
+MultiGrid uses a different action enumeration than MiniGrid. The backend automatically translates:
+
+| MiniGrid Action | MultiGrid Action | Description |
+|-----------------|------------------|-------------|
+| 0: turn_left | 2: TURN_LEFT | Rotate counterclockwise |
+| 1: turn_right | 3: TURN_RIGHT | Rotate clockwise |
+| 2: forward | 0: FORWARD | Move in facing direction |
+| 3: pickup | 4: PICKUP | Pick up object in front |
+| 4: drop | 5: DROP | Drop held object |
+| 5: toggle | 6: PUSH | Interact with object |
+| 6: done | 7: WAIT | No-op action |
+
+**Example**:
+```python
+# Use standard MiniGrid action indices
+obs, reward, terminated, truncated, state, info = backend.step(2)  # forward
+
+# Translation happens automatically
+# Agent can use same policy on MiniGrid or MultiGrid
+```
+
+**Design Rationale**: Action translation enables:
+- **Policy Reuse**: Same agent works on both backends
+- **Backend Comparison**: Evaluate same policy on square vs hex grids
+- **Simplified Evaluation**: Caller doesn't need backend-specific knowledge
+
+### Method: `_convert_task_spec(spec)`
+
+Internal method that converts MiniGrid TaskSpecification to MultiGrid format.
+
+**Parameters**:
+- `spec` (TaskSpecification): MiniGrid format task
+
+**Returns**:
+- `dict`: MultiGrid format task specification
+
+**Conversion Details**:
+
+```python
+# MiniGrid format
+{
+    "maze": {
+        "dimensions": [8, 8],
+        "start": [1, 1],
+        "goal": [6, 6],
+        "walls": [[3, 3], [3, 4]]
+    },
+    "mechanisms": {
+        "keys": [{"id": "key1", "position": [2, 2], "color": "red"}],
+        "doors": [{"id": "door1", "position": [4, 4], "requires_key": "red"}],
+        "blocks": [{"id": "block1", "position": [3, 5], "color": "grey"}]
+    }
+}
+
+# Converts to MultiGrid format
+{
+    "tiling": {
+        "type": "hex",  # From backend.tiling_type
+        "grid_size": {"width": 8, "height": 8}
+    },
+    "scene": {
+        "agent": {
+            "position": {"x": 0.125, "y": 0.125},  # 1/8, 1/8
+            "facing": 0
+        },
+        "objects": [
+            {
+                "id": "key1",
+                "type": "movable",
+                "color": "red",
+                "position": {"x": 0.25, "y": 0.25}  # 2/8, 2/8
+            },
+            {
+                "id": "door1",
+                "type": "wall",
+                "color": "red",
+                "position": {"x": 0.5, "y": 0.5}  # 4/8, 4/8
+            },
+            {
+                "id": "block1",
+                "type": "movable",
+                "color": "grey",
+                "position": {"x": 0.375, "y": 0.625}  # 3/8, 5/8
+            }
+        ],
+        "walls": [[3, 3], [3, 4]]  # Kept as absolute coordinates
+    },
+    "goal": {
+        "type": "reach_position",
+        "target": {"x": 0.75, "y": 0.75}  # 6/8, 6/8
+    },
+    "limits": {
+        "max_steps": 100
+    }
+}
+```
+
+**Object Type Mapping**:
+- Keys → `"movable"` (can be picked up)
+- Doors → `"wall"` (blocking barrier with color)
+- Blocks → `"movable"` (pushable)
+- Switches → Not yet supported
+- Gates → Not yet supported
+
+**Limitations**:
+- Switches and gates not implemented in MultiGrid
+- Teleporters not supported
+- Hazards not supported
+- All mechanisms except reach_position goals are limited
+
+### Method: `_build_grid_state()`
+
+Internal method that extracts GridState from MultiGrid environment.
+
+**Returns**:
+- `GridState`: Backend-agnostic state representation
+
+**Extraction Process**:
+
+1. **Agent Position**: Convert from cell_id → normalized coordinates → grid coordinates
+2. **Agent Carrying**: Extract from `state.agent.holding`
+3. **Block Positions**: Iterate through `state.objects` and convert positions
+4. **Goal State**: Check `state.check_goal()`
+
+**Coordinate Conversion**:
+
+```python
+# MultiGrid stores positions as cell IDs in the tiling
+cell_id = state.agent.cell_id
+
+# Convert to normalized [0,1] coordinates
+normalized_pos = tiling.cell_to_canonical(cell_id)
+# normalized_pos = (0.375, 0.625)
+
+# Convert to grid coordinates
+grid_pos = (
+    int(normalized_pos[0] * grid_width),
+    int(normalized_pos[1] * grid_height)
+)
+# grid_pos = (3, 5) for 8×8 grid
+```
+
+**Example Output**:
+```python
+state = backend.get_state()
+# GridState(
+#     agent_position=(3, 5),
+#     agent_direction=2,
+#     agent_carrying="key1",
+#     step_count=15,
+#     max_steps=100,
+#     block_positions={"block1": (4, 6)},
+#     goal_reached=False
+# )
+```
+
+---
+
+## Task Specification Conversion
+
+### Coordinate Normalization
+
+**Why Normalize?**
+
+Different tilings have different spatial properties:
+- Square: 4 neighbors, regular spacing
+- Hex: 6 neighbors, 60° angles
+- Triangle: Variable neighbors, complex topology
+
+Normalized coordinates abstract over these differences, allowing the "same" task on different tilings.
+
+**Example**:
+
+```python
+# Task: Agent at (2, 3), goal at (6, 7) in 8×8 grid
+
+# Square tiling: 4 steps right, 4 steps down = 8 steps minimum
+# Hex tiling: Can move diagonally, ~6 steps minimum
+# Triangle tiling: Complex, depends on orientation
+
+# Normalized positions allow all three to work:
+# Agent: (0.25, 0.375)
+# Goal: (0.75, 0.875)
+```
+
+**Normalization Formula**:
+
+```python
+x_normalized = x_grid / grid_width
+y_normalized = y_grid / grid_height
+
+# Example: Position (3, 5) in 8×8 grid
+# x_norm = 3 / 8 = 0.375
+# y_norm = 5 / 8 = 0.625
+```
+
+**Denormalization** (for GridState extraction):
+
+```python
+x_grid = int(x_normalized * grid_width)
+y_grid = int(y_normalized * grid_height)
+
+# Example: Normalized (0.375, 0.625) in 8×8 grid
+# x_grid = int(0.375 * 8) = 3
+# y_grid = int(0.625 * 8) = 5
+```
+
+### Object Type Unification
+
+MiniGrid has separate lists for different mechanism types. MultiGrid uses a unified objects list with a `type` field.
+
+**Mapping**:
+
+| MiniGrid Mechanism | MultiGrid Type | Notes |
+|--------------------|----------------|-------|
+| `keys` | `"movable"` | Can be picked up and carried |
+| `doors` | `"wall"` | Blocking barrier (unlock not implemented) |
+| `blocks` | `"movable"` | Pushable objects |
+| `switches` | N/A | Not yet supported |
+| `gates` | N/A | Not yet supported |
+| `teleporters` | N/A | Not yet supported |
+| `hazards` | N/A | Not yet supported |
+
+**Example Conversion**:
+
+```python
+# MiniGrid: Separate lists
+"mechanisms": {
+    "keys": [
+        {"id": "k1", "position": [2, 2], "color": "red"},
+        {"id": "k2", "position": [3, 3], "color": "blue"}
+    ],
+    "doors": [
+        {"id": "d1", "position": [5, 5], "requires_key": "red"}
+    ],
+    "blocks": [
+        {"id": "b1", "position": [4, 4], "color": "grey"}
+    ]
+}
+
+# MultiGrid: Unified objects list
+"scene": {
+    "objects": [
+        {"id": "k1", "type": "movable", "color": "red",
+         "position": {"x": 0.25, "y": 0.25}},
+        {"id": "k2", "type": "movable", "color": "blue",
+         "position": {"x": 0.375, "y": 0.375}},
+        {"id": "d1", "type": "wall", "color": "red",
+         "position": {"x": 0.625, "y": 0.625}},
+        {"id": "b1", "type": "movable", "color": "grey",
+         "position": {"x": 0.5, "y": 0.5}}
+    ]
+}
+```
+
+### Goal Specification
+
+MultiGrid supports multiple goal types with slight differences in format.
+
+**Supported Goals**:
+
+1. **Reach Position**:
+```python
+# MiniGrid
+"goal": {
+    "goal_type": "reach_position",
+    "target": [6, 6]
+}
+
+# MultiGrid
+"goal": {
+    "type": "reach_position",
+    "target": {"x": 0.75, "y": 0.75}  # Normalized
+}
+```
+
+2. **Collect All**:
+```python
+# MiniGrid
+"goal": {
+    "goal_type": "collect_all",
+    "target_ids": ["key1", "key2"]
+}
+
+# MultiGrid
+"goal": {
+    "type": "collect_all",
+    "target_ids": ["key1", "key2"]
+}
+```
+
+3. **Push Block To**:
+```python
+# MiniGrid
+"goal": {
+    "goal_type": "push_block_to",
+    "target_ids": ["block1"],
+    "target_positions": [[7, 7]]
+}
+
+# MultiGrid
+"goal": {
+    "type": "push_block_to",
+    "target_ids": ["block1"],
+    "target_positions": [{"x": 0.875, "y": 0.875}]
+}
+```
+
+---
+
+## Usage Examples
+
+### Example 1: Square vs Hex Comparison
+
+```python
+from gridworld.backends import MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Load a navigation task
+spec = TaskSpecification.from_json("tasks/navigation_8x8.json")
+
+# Evaluate on square grid
+square_backend = MultiGridBackend(tiling="square")
+square_backend.configure(spec)
+obs, state, info = square_backend.reset(seed=42)
+
+# Count steps to goal
+steps_square = 0
+done = False
+while not done:
+    action = policy(obs)
+    obs, reward, terminated, truncated, state, info = square_backend.step(action)
+    steps_square += 1
+    done = terminated or truncated
+
+print(f"Square grid: {steps_square} steps")
+
+# Evaluate on hexagonal grid
+hex_backend = MultiGridBackend(tiling="hex")
+hex_backend.configure(spec)
+obs, state, info = hex_backend.reset(seed=42)
+
+steps_hex = 0
+done = False
+while not done:
+    action = policy(obs)
+    obs, reward, terminated, truncated, state, info = hex_backend.step(action)
+    steps_hex += 1
+    done = terminated or truncated
+
+print(f"Hexagonal grid: {steps_hex} steps")
+print(f"Difference: {abs(steps_square - steps_hex)} steps")
+```
+
+### Example 2: Multi-Tiling Evaluation
+
+```python
+from gridworld.backends import MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+
+def evaluate_across_tilings(policy_fn, task_path, tilings=["square", "hex", "triangle"]):
+    """
+    Evaluate a policy on the same task across different tilings.
+    """
+    spec = TaskSpecification.from_json(task_path)
+
+    results = {}
+    for tiling_type in tilings:
+        backend = MultiGridBackend(tiling=tiling_type)
+        backend.configure(spec)
+
+        # Run episode
+        obs, state, info = backend.reset(seed=42)
+        done = False
+        total_reward = 0
+        steps = 0
+
+        while not done:
+            action = policy_fn(obs)
+            obs, reward, terminated, truncated, state, info = backend.step(action)
+            total_reward += reward
+            steps += 1
+            done = terminated or truncated
+
+        results[tiling_type] = {
+            "success": state.goal_reached,
+            "reward": total_reward,
+            "steps": steps
+        }
+
+        backend.close()
+
+    return results
+
+# Example usage
+results = evaluate_across_tilings(my_policy, "task.json")
+for tiling, metrics in results.items():
+    print(f"{tiling:10s}: success={metrics['success']}, "
+          f"steps={metrics['steps']}, reward={metrics['reward']:.3f}")
+```
+
+### Example 3: Visualization of Different Tilings
+
+```python
+from gridworld.backends import MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+import matplotlib.pyplot as plt
+
+# Load task
+spec = TaskSpecification.from_json("task.json")
+
+# Create backends for each tiling
+tilings = ["square", "hex", "triangle"]
+backends = {t: MultiGridBackend(tiling=t) for t in tilings}
+
+# Configure and reset
+for tiling, backend in backends.items():
+    backend.configure(spec)
+    backend.reset(seed=42)
+
+# Visualize
+fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+for ax, tiling in zip(axes, tilings):
+    backend = backends[tiling]
+    img = backend.render()
+    ax.imshow(img)
+    ax.set_title(f"{tiling.capitalize()} Tiling")
+    ax.axis('off')
+
+plt.tight_layout()
+plt.savefig("tiling_comparison.png")
+plt.show()
+
+# Cleanup
+for backend in backends.values():
+    backend.close()
+```
+
+### Example 4: Custom Task on Hex Grid
+
+```python
+from gridworld.backends import MultiGridBackend
+
+# Define task programmatically
+task_data = {
+    "task_id": "hex_navigation",
+    "seed": 42,
+    "difficulty_tier": 1,
+    "max_steps": 50,
+    "maze": {
+        "dimensions": [8, 8],
+        "start": [1, 1],
+        "goal": [6, 6],
+        "walls": [[3, 3], [3, 4], [4, 3]]  # Small obstacle
+    },
+    "mechanisms": {
+        "keys": [],
+        "doors": [],
+        "blocks": []
+    },
+    "goal": {
+        "type": "reach_position",
+        "target": [6, 6]
+    }
+}
+
+# Load on hexagonal grid
+backend = MultiGridBackend(tiling="hex")
+spec = TaskSpecification.from_dict(task_data)
+backend.configure(spec)
+
+# Run episode
+obs, state, info = backend.reset()
+print(f"Mission: {backend.get_mission_text()}")
+print(f"Agent starts at: {state.agent_position}")
+
+# Take some actions
+for action in [2, 2, 1, 2, 2]:  # forward, forward, turn_right, forward, forward
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+    print(f"Position: {state.agent_position}, Direction: {state.agent_direction}")
+
+    if terminated:
+        if reward > 0:
+            print("Goal reached!")
+        break
+
+backend.close()
+```
+
+### Example 5: Action Space Verification
+
+```python
+from gridworld.backends import MiniGridBackend, MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Load task
+spec = TaskSpecification.from_json("task.json")
+
+# Create both backends
+minigrid = MiniGridBackend()
+multigrid = MultiGridBackend(tiling="square")
+
+minigrid.configure(spec)
+multigrid.configure(spec)
+
+# Reset with same seed
+obs1, state1, _ = minigrid.reset(seed=42)
+obs2, state2, _ = multigrid.reset(seed=42)
+
+print("Initial states:")
+print(f"  MiniGrid: pos={state1.agent_position}, dir={state1.agent_direction}")
+print(f"  MultiGrid: pos={state2.agent_position}, dir={state2.agent_direction}")
+
+# Execute same actions
+actions = [2, 2, 1, 2]  # forward, forward, turn_right, forward
+for action in actions:
+    obs1, r1, t1, tr1, state1, _ = minigrid.step(action)
+    obs2, r2, t2, tr2, state2, _ = multigrid.step(action)
+
+    print(f"\nAfter action {action}:")
+    print(f"  MiniGrid: pos={state1.agent_position}")
+    print(f"  MultiGrid: pos={state2.agent_position}")
+
+    # Positions should match (for square tiling)
+    assert state1.agent_position == state2.agent_position, "Position mismatch!"
+
+print("\n✓ Action space translation verified!")
+
+minigrid.close()
+multigrid.close()
+```
+
+---
+
+## Feature Support and Limitations
+
+### Tiling Support
+
+| Tiling | Status | Notes |
+|--------|--------|-------|
+| Square | ✓ Full | Same as MiniGrid |
+| Hexagonal | ✓ Experimental | 6-connected, 60° angles |
+| Triangular | ✓ Experimental | Complex topology, variable connectivity |
+
+### Mechanism Support
+
+| Mechanism | Status | Notes |
+|-----------|--------|-------|
+| Walls | ✓ Supported | Static barriers |
+| Keys | Partial | Can be placed, but pickup may not work correctly |
+| Doors | ✗ Limited | Rendered as colored walls, no unlock mechanic |
+| Switches | ✗ Not implemented | MultiGrid enhancement needed |
+| Gates | ✗ Not implemented | MultiGrid enhancement needed |
+| Blocks | Partial | Rendered, but push mechanic unverified |
+| Hazards | ✗ Not implemented | No hazard support in MultiGrid |
+| Teleporters | ✗ Not implemented | Planned feature |
+
+### Goal Support
+
+| Goal Type | Status | Implementation |
+|-----------|--------|----------------|
+| Reach Position | ✓ Supported | Fully functional |
+| Collect All | ⚠️ Partial | Goal spec converted, checking may not work |
+| Push Block To | ⚠️ Partial | Goal spec converted, checking may not work |
+| Survive Steps | ⚠️ Partial | Can be specified, but no special handling |
+
+**Legend**: ✓ Full support | ⚠️ Partial support | ✗ Not supported
+
+### Known Limitations
+
+1. **Mechanism Interactivity**: Many mechanisms (doors, switches, gates) are not yet implemented in the underlying MultiGrid environment. They may be converted and placed but won't function.
+
+2. **Coordinate Precision**: Integer-to-normalized conversion can lose precision:
+   ```python
+   # Original: (3, 5) in 8×8 grid
+   # Normalized: (0.375, 0.625)
+   # Back to grid: (3, 5)  ✓ OK
+
+   # Original: (7, 7) in 8×8 grid
+   # Normalized: (0.875, 0.875)
+   # Back to grid: (7, 7)  ✓ OK
+
+   # But for odd dimensions:
+   # Original: (3, 5) in 7×7 grid
+   # Normalized: (0.428571, 0.714286)
+   # Back to grid: (2, 4)  ✗ Precision loss!
+   ```
+   **Recommendation**: Use power-of-2 dimensions (8×8, 16×16) for exact conversion.
+
+3. **Rendering Quality**: MultiGrid rendering is experimental. Hex and triangle tilings may have visual artifacts.
+
+4. **Performance**: MultiGrid is ~1.5× slower than MiniGrid due to coordinate conversions and less optimized implementation.
+
+5. **Partial Observability**: Not yet implemented. All observations are full-grid.
+
+---
+
+## Performance Characteristics
+
+### Timing Benchmarks (8×8 grid, square tiling)
+
+| Operation | MiniGrid | MultiGrid | Overhead |
+|-----------|----------|-----------|----------|
+| configure() | ~0.1 ms | ~5 ms | 50× |
+| reset() | ~10 ms | ~15 ms | 1.5× |
+| step() | ~3 ms | ~5 ms | 1.67× |
+| render() | ~4 ms | ~8 ms | 2× |
+
+**Total episode (100 steps)**: ~600-800 ms (vs ~400 ms for MiniGrid)
+
+### Hexagonal and Triangle Tilings
+
+Exotic tilings add additional overhead:
+
+| Tiling | Episode Time | Relative to Square |
+|--------|--------------|-------------------|
+| Square | ~600 ms | 1.0× |
+| Hex | ~750 ms | 1.25× |
+| Triangle | ~900 ms | 1.5× |
+
+**Bottlenecks**:
+1. Cell ID ↔ normalized coordinate conversion
+2. Neighbor computation for non-square tilings
+3. Rendering complex tiling shapes
+
+---
+
+## Comparison with MiniGrid Backend
+
+| Aspect | MiniGridBackend | MultiGridBackend |
+|--------|-----------------|------------------|
+| **Maturity** | Production-ready | Experimental |
+| **Tilings** | Square only | Square, hex, triangle |
+| **Mechanisms** | Full support | Limited (keys/walls only) |
+| **Performance** | Fast (~400ms/episode) | Slower (~600-900ms/episode) |
+| **Rendering** | High quality | Experimental quality |
+| **Partial Obs** | Supported | Not yet |
+| **Backend Source** | Gymnasium MiniGrid | Custom MultiGrid |
+| **Use Case** | Standard evaluation | Research on exotic tilings |
+| **Stability** | Stable | May have bugs |
+| **Documentation** | Comprehensive | Limited |
+
+**When to Use MultiGrid**:
+- Research on spatial representation and topology
+- Investigating agent generalization across grid types
+- Exploring hexagonal or triangular navigation
+
+**When to Use MiniGrid**:
+- Production evaluation
+- Need full mechanism support
+- Performance is critical
+- Stability and maturity required
+
+---
+
+## Integration with Evaluation Pipeline
+
+### Standard Evaluation Pattern
+
+```python
+from gridworld.backends import MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+
+def run_multigrid_evaluation(agent, task_files, tiling="square"):
+    """
+    Evaluation loop using MultiGrid backend.
+    """
+    backend = MultiGridBackend(tiling=tiling, render_mode="rgb_array")
+    results = {}
+
+    for task_file in task_files:
+        spec = TaskSpecification.from_json(task_file)
+        backend.configure(spec)
+
+        # Run episode
+        obs, state, info = backend.reset(seed=42)
+        episode_data = {
+            "tiling": tiling,
+            "observations": [obs],
+            "actions": [],
+            "rewards": []
+        }
+
+        done = False
+        while not done:
+            action = agent.predict(obs)
+            obs, reward, terminated, truncated, state, info = backend.step(action)
+
+            episode_data["observations"].append(obs)
+            episode_data["actions"].append(action)
+            episode_data["rewards"].append(reward)
+            done = terminated or truncated
+
+        episode_data["success"] = state.goal_reached
+        episode_data["total_reward"] = sum(episode_data["rewards"])
+        episode_data["steps"] = len(episode_data["actions"])
+
+        results[spec.task_id] = episode_data
+
+    backend.close()
+    return results
+```
+
+### Cross-Backend Comparison
+
+```python
+from gridworld.backends import MiniGridBackend, MultiGridBackend
+
+def compare_backends(agent, task_path):
+    """
+    Compare agent performance on MiniGrid vs MultiGrid (square).
+    """
+    spec = TaskSpecification.from_json(task_path)
+
+    # MiniGrid
+    mg_backend = MiniGridBackend()
+    mg_backend.configure(spec)
+    obs, state, _ = mg_backend.reset(seed=42)
+
+    mg_steps = 0
+    done = False
+    while not done:
+        action = agent.predict(obs)
+        obs, reward, terminated, truncated, state, _ = mg_backend.step(action)
+        mg_steps += 1
+        done = terminated or truncated
+
+    mg_success = state.goal_reached
+    mg_backend.close()
+
+    # MultiGrid
+    mu_backend = MultiGridBackend(tiling="square")
+    mu_backend.configure(spec)
+    obs, state, _ = mu_backend.reset(seed=42)
+
+    mu_steps = 0
+    done = False
+    while not done:
+        action = agent.predict(obs)
+        obs, reward, terminated, truncated, state, _ = mu_backend.step(action)
+        mu_steps += 1
+        done = terminated or truncated
+
+    mu_success = state.goal_reached
+    mu_backend.close()
+
+    return {
+        "minigrid": {"success": mg_success, "steps": mg_steps},
+        "multigrid": {"success": mu_success, "steps": mu_steps}
+    }
+```
+
+---
+
+## Troubleshooting
+
+### Issue 1: ImportError for MultiGrid
+
+**Error**: `ModuleNotFoundError: No module named 'multigrid'`
+
+**Cause**: MultiGrid module not in Python path
+
+**Solution**:
+```python
+# The backend handles this automatically via sys.path manipulation
+# But if you see this error, check:
+import sys
+from pathlib import Path
+
+multigrid_path = Path(__file__).parent.parent.parent / "multigrid"
+if str(multigrid_path.parent) not in sys.path:
+    sys.path.insert(0, str(multigrid_path.parent))
+```
+
+### Issue 2: Coordinate Mismatch
+
+**Symptom**: Agent/objects appear at wrong positions
+
+**Cause**: Coordinate normalization precision loss
+
+**Solution**: Use power-of-2 dimensions (8×8, 16×16, 32×32)
+
+### Issue 3: Mechanisms Not Working
+
+**Symptom**: Keys can't be picked up, doors don't open
+
+**Cause**: Mechanism interaction not yet implemented in MultiGrid
+
+**Solution**: Currently, MultiGrid backend is best for navigation-only tasks. For tasks requiring mechanisms, use MiniGridBackend.
+
+### Issue 4: Rendering Artifacts on Hex/Triangle
+
+**Symptom**: Visual glitches in rendered images
+
+**Cause**: Experimental rendering code
+
+**Solution**: This is a known limitation. For publication-quality visualizations, use square tiling or generate custom renders.
+
+---
+
+## Future Enhancements
+
+### Planned Features
+
+1. **Full Mechanism Support**:
+   - Implement switches and gates in MultiGrid
+   - Add door unlock mechanic
+   - Add hazard tiles
+
+2. **Partial Observability**:
+   - Limited agent field of view
+   - Fog of war
+   - Memory-dependent tasks
+
+3. **Improved Rendering**:
+   - High-quality hex/triangle tile graphics
+   - Customizable visual themes
+   - Animation support
+
+4. **Performance Optimization**:
+   - Cache coordinate conversions
+   - Optimize neighbor lookups for exotic tilings
+   - Vectorized rendering
+
+5. **Additional Tilings**:
+   - Octagonal + square (Islamic tiling)
+   - Penrose tiling (aperiodic)
+   - Voronoi diagrams
+
+### Research Directions
+
+- **Topology Invariance**: Do agents learn topology-invariant navigation strategies?
+- **Transfer Learning**: Does training on hex grids improve performance on square grids?
+- **Spatial Reasoning**: How do different tilings affect spatial reasoning tasks?
+
+---
+
+## See Also
+
+- [MiniGrid Backend Documentation](./minigrid_backend.md): Production backend for standard tasks
+- [Task Parser Documentation](./task_parser.md): How tasks are parsed
+- [AbstractGridBackend Interface](../gridworld/backends/base.py): Backend interface specification
+- [MultiGrid Environment](../multigrid/env.py): Underlying custom environment
+- [Tiling Theory](../../docs/tiling_theory.md): Mathematical background on grid tilings
diff --git a/src/v1_1/docs/task_parser.md b/src/v1_1/docs/task_parser.md
new file mode 100644
index 00000000..a77caaa4
--- /dev/null
+++ b/src/v1_1/docs/task_parser.md
@@ -0,0 +1,630 @@
+# Task Parser Documentation
+
+## Overview
+
+The Task Parser is a critical component of the MiniGrid evaluation framework that transforms declarative JSON task specifications into fully configured, executable MiniGrid environments. It acts as the bridge between high-level task definitions and low-level environment instantiation.
+
+**Purpose**: Enable researchers and evaluators to define gridworld puzzles in a human-readable JSON format without needing to write Python code or understand MiniGrid internals.
+
+**Location**: `/src/v1_1/gridworld/task_parser.py`
+
+**Key Classes**:
+- `TaskParser`: Main parser class that orchestrates environment creation
+- Helper functions: `load_task_from_file()`, `load_task_from_dict()`
+
+---
+
+## Architecture
+
+### Design Philosophy
+
+The Task Parser follows a three-phase architecture:
+
+1. **Validation Phase**: Verify task specification correctness
+2. **Environment Creation Phase**: Instantiate and initialize the base environment
+3. **Population Phase**: Add task-specific objects to the grid
+
+This separation ensures that errors are caught early (validation) before expensive environment creation, and that initialization order is handled correctly (creation before population).
+
+### Component Interaction
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Task Parser Flow                         │
+└─────────────────────────────────────────────────────────────┘
+
+JSON File                    TaskSpecification
+   or                               │
+Dictionary                          │
+    │                               │
+    └──────────┬────────────────────┘
+               │
+               ▼
+        ┌─────────────┐
+        │TaskParser   │
+        │  .parse()   │
+        └──────┬──────┘
+               │
+               ├──► 1. Validate Specification
+               │       - Bounds checking
+               │       - Dependency validation
+               │       - Consistency checks
+               │
+               ├──► 2. Create Environment
+               │       - Instantiate CustomMiniGridEnv
+               │       - Call reset() to initialize grid
+               │       - Set up border walls
+               │
+               └──► 3. Populate Grid
+                       - Add interior walls
+                       - Place goal marker
+                       - Add keys (collectible items)
+                       - Add doors (barriers)
+                       - Add gates (must come before switches!)
+                       - Add switches (control gates)
+                       - Add blocks (pushable)
+                       - Add hazards (lava, pits)
+                       - Set agent position (last!)
+                       │
+                       ▼
+              CustomMiniGridEnv
+              (Ready for use)
+```
+
+### Critical Design Decisions
+
+#### 1. Why Reset Inside Parser?
+
+The `TaskParser.parse()` method calls `env.reset()` internally. This might seem odd since backends also have a `reset()` method. The rationale:
+
+- **Grid Initialization**: MiniGrid requires `reset()` to be called before the grid can be populated. The `_gen_grid()` method (called by `reset()`) creates the grid structure and adds border walls.
+- **Single Responsibility**: The parser is responsible for creating a *fully configured* environment. Calling reset outside would require the caller to know about this implementation detail.
+- **Avoids Double Reset**: Backend `reset()` methods call `parser.parse()`, which already resets. If the backend also called `env.reset()`, it would wipe out all placed objects.
+
+```python
+# WRONG: This would wipe out all objects!
+env = parser.parse(task_spec)
+env.reset()  # ← Don't do this!
+
+# CORRECT: Parser handles reset internally
+env = parser.parse(task_spec)
+# Environment is ready to use
+```
+
+#### 2. Object Placement Order
+
+The `_populate_grid()` method places objects in a specific order to handle dependencies:
+
+1. **Clear interior** (preserve border walls)
+2. **Walls** (static barriers)
+3. **Goal** (win condition marker)
+4. **Keys** (collectible items)
+5. **Doors** (barriers that require keys)
+6. **Gates** (barriers controlled by switches) ← Must come before switches
+7. **Switches** (controls that toggle gates)
+8. **Blocks** (pushable objects)
+9. **Hazards** (lava, pits, spikes)
+10. **Agent position** (always last to ensure correct spawn)
+
+**Why gates before switches?** Switches store references to gate IDs and validate them during placement. If switches are placed first, they'll fail to find their target gates.
+
+**Why agent position last?** If the task specification accidentally places an object at the agent's start position, placing the agent last ensures it spawns correctly anyway.
+
+---
+
+## Key Components
+
+### TaskParser Class
+
+```python
+class TaskParser:
+    """
+    Parse TaskSpecification and create configured MiniGrid environments.
+    """
+
+    def __init__(self, render_mode: Optional[str] = None)
+    def parse(self, spec: TaskSpecification, seed: Optional[int] = None) -> CustomMiniGridEnv
+    def parse_file(self, path: Union[str, Path]) -> CustomMiniGridEnv
+    def parse_dict(self, data: dict) -> CustomMiniGridEnv
+    def _populate_grid(self, env: CustomMiniGridEnv, spec: TaskSpecification)
+```
+
+#### Constructor: `__init__(render_mode)`
+
+**Parameters**:
+- `render_mode` (str, optional): Rendering mode for created environments
+  - `"human"`: Opens a window for human viewing
+  - `"rgb_array"`: Returns RGB numpy arrays (for headless evaluation)
+  - `None`: No rendering (fastest)
+
+**Example**:
+```python
+# For headless server evaluation
+parser = TaskParser(render_mode="rgb_array")
+
+# For interactive debugging
+parser = TaskParser(render_mode="human")
+```
+
+#### Method: `parse(spec, seed=None)`
+
+The core parsing method. Transforms a TaskSpecification into a configured environment.
+
+**Parameters**:
+- `spec` (TaskSpecification): The task to parse
+- `seed` (int, optional): Random seed override. If None, uses `spec.seed`
+
+**Returns**:
+- `CustomMiniGridEnv`: Configured and ready-to-use environment
+
+**Raises**:
+- `ValueError`: If the task specification fails validation
+
+**Example**:
+```python
+from gridworld.task_spec import TaskSpecification
+from gridworld.task_parser import TaskParser
+
+# Load specification
+spec = TaskSpecification.from_json("task_001.json")
+
+# Create parser and parse
+parser = TaskParser(render_mode="rgb_array")
+env = parser.parse(spec, seed=42)
+
+# Environment is ready to use
+obs, info = env.reset()
+```
+
+#### Method: `parse_file(path)`
+
+Convenience method that loads a JSON file and parses it.
+
+**Parameters**:
+- `path` (str or Path): Path to JSON task specification file
+
+**Returns**:
+- `CustomMiniGridEnv`: Configured environment
+
+**Example**:
+```python
+parser = TaskParser()
+env = parser.parse_file("tasks/navigation/task_001.json")
+```
+
+#### Method: `parse_dict(data)`
+
+Convenience method that parses a dictionary (e.g., loaded from JSON or constructed programmatically).
+
+**Parameters**:
+- `data` (dict): Dictionary containing task specification
+
+**Returns**:
+- `CustomMiniGridEnv`: Configured environment
+
+**Example**:
+```python
+import json
+
+with open("task.json") as f:
+    data = json.load(f)
+
+parser = TaskParser()
+env = parser.parse_dict(data)
+```
+
+### Helper Functions
+
+#### `load_task_from_file(path, render_mode=None)`
+
+Top-level convenience function for the most common use case: loading a task from a JSON file.
+
+**Parameters**:
+- `path` (str or Path): Path to JSON file
+- `render_mode` (str, optional): Rendering mode
+
+**Returns**:
+- `CustomMiniGridEnv`: Configured environment
+
+**Example**:
+```python
+from gridworld.task_parser import load_task_from_file
+
+# One-liner to load and parse
+env = load_task_from_file("task.json", render_mode="rgb_array")
+```
+
+#### `load_task_from_dict(data, render_mode=None)`
+
+Top-level convenience function for loading from a dictionary.
+
+**Parameters**:
+- `data` (dict): Task specification dictionary
+- `render_mode` (str, optional): Rendering mode
+
+**Returns**:
+- `CustomMiniGridEnv`: Configured environment
+
+---
+
+## Usage Examples
+
+### Example 1: Basic Navigation Task
+
+```python
+from gridworld.task_parser import load_task_from_file
+
+# Load a simple navigation task
+env = load_task_from_file("tasks/tier1/navigate_8x8.json")
+
+# Run episode
+obs, info = env.reset()
+done = False
+total_reward = 0
+
+while not done:
+    # Simple random policy
+    action = env.action_space.sample()
+    obs, reward, terminated, truncated, info = env.step(action)
+    total_reward += reward
+    done = terminated or truncated
+
+print(f"Episode finished with reward: {total_reward}")
+```
+
+### Example 2: Key-Door Puzzle
+
+```python
+from gridworld.task_parser import TaskParser
+from gridworld.task_spec import TaskSpecification
+
+# Load task specification
+spec = TaskSpecification.from_json("tasks/tier2/key_door_puzzle.json")
+
+# Create parser with rendering for debugging
+parser = TaskParser(render_mode="human")
+
+# Parse with specific seed for reproducibility
+env = parser.parse(spec, seed=123)
+
+# Environment contains:
+# - Keys at specified positions
+# - Locked doors matching key colors
+# - Agent must collect key, unlock door, reach goal
+```
+
+### Example 3: Switch-Gate Mechanism
+
+```python
+from gridworld.task_parser import load_task_from_dict
+
+# Programmatically define a task
+task_data = {
+    "task_id": "custom_switch_gate",
+    "seed": 42,
+    "difficulty_tier": 3,
+    "max_steps": 100,
+    "maze": {
+        "dimensions": [8, 8],
+        "walls": [[3, 3], [3, 4], [3, 5]],
+        "start": [1, 1],
+        "goal": [6, 6]
+    },
+    "mechanisms": {
+        "switches": [{
+            "id": "sw1",
+            "position": [2, 4],
+            "controls": ["gate1"],
+            "switch_type": "toggle"
+        }],
+        "gates": [{
+            "id": "gate1",
+            "position": [4, 4],
+            "initial_state": "closed"
+        }]
+    },
+    "goal": {
+        "type": "reach_position",
+        "target": [6, 6]
+    }
+}
+
+# Load from dictionary
+env = load_task_from_dict(task_data, render_mode="rgb_array")
+
+# Agent must toggle switch to open gate, then reach goal
+```
+
+### Example 4: Evaluation Loop with Multiple Seeds
+
+```python
+from gridworld.task_parser import TaskParser
+from gridworld.task_spec import TaskSpecification
+
+# Load task once
+spec = TaskSpecification.from_json("task.json")
+parser = TaskParser(render_mode="rgb_array")
+
+# Evaluate with multiple seeds
+results = []
+for seed in range(10):
+    env = parser.parse(spec, seed=seed)
+
+    # Run episode
+    obs, info = env.reset()
+    done = False
+    steps = 0
+    success = False
+
+    while not done and steps < 100:
+        action = my_policy(obs)  # Your agent policy
+        obs, reward, terminated, truncated, info = env.step(action)
+        steps += 1
+        done = terminated or truncated
+        if terminated and reward > 0:
+            success = True
+
+    results.append({
+        "seed": seed,
+        "success": success,
+        "steps": steps
+    })
+
+# Analyze results
+success_rate = sum(r["success"] for r in results) / len(results)
+print(f"Success rate: {success_rate:.1%}")
+```
+
+---
+
+## Object Placement Rules
+
+### Walls
+
+- **Type**: Static barriers
+- **Placement**: Skip border positions (already have walls from reset)
+- **Constraints**: Cannot overlap with start or goal positions (validated by TaskSpecification)
+
+```python
+# Walls are added to interior cells only
+for wall_pos in spec.maze.walls:
+    if 0 < x < width - 1 and 0 < y < height - 1:
+        env.place_wall(x, y)
+```
+
+### Keys
+
+- **Type**: Collectible items
+- **Placement**: Added as pickupable objects on the grid
+- **Colors**: "red", "blue", "green", "yellow", "purple", "grey"
+- **Mechanics**: Can be picked up and used to unlock matching doors
+
+```python
+for key in spec.mechanisms.keys:
+    env.place_key(key.position.x, key.position.y, key.color)
+```
+
+### Doors
+
+- **Type**: Barriers that require keys to unlock
+- **Placement**: Added as locked or unlocked doors
+- **Colors**: Must match a key color in the task
+- **Mechanics**: Agent with matching key can unlock and open
+
+```python
+for door in spec.mechanisms.doors:
+    is_locked = door.initial_state == "locked"
+    env.place_door(door.position.x, door.position.y,
+                   door.requires_key, is_locked)
+```
+
+### Gates and Switches
+
+- **Type**: Remote-controlled barriers
+- **Placement**: Gates first, then switches (dependency!)
+- **Mechanics**: Toggling a switch changes state of all controlled gates
+- **Dependency**: Switches reference gate IDs, so gates must exist first
+
+```python
+# Place gates first
+for gate in spec.mechanisms.gates:
+    is_open = gate.initial_state == "open"
+    env.place_gate(gate.position.x, gate.position.y, gate.id, is_open)
+
+# Then place switches that control them
+for switch in spec.mechanisms.switches:
+    env.place_switch(switch.position.x, switch.position.y,
+                     switch.id, switch.controls)
+```
+
+### Blocks
+
+- **Type**: Pushable objects (Sokoban-style)
+- **Placement**: Added as Box objects
+- **Mechanics**: Agent can push blocks by moving into them
+- **Use Case**: Block puzzles, path creation
+
+```python
+for block in spec.mechanisms.blocks:
+    env.place_block(block.position.x, block.position.y,
+                    block.id, block.color)
+```
+
+### Hazards
+
+- **Type**: Dangerous tiles that end the episode
+- **Placement**: Added as Lava objects
+- **Types**: "lava", "pit", "spike" (all rendered as lava in MiniGrid)
+- **Mechanics**: Stepping on a hazard terminates the episode
+
+```python
+for hazard in spec.mechanisms.hazards:
+    env.place_hazard(hazard.position.x, hazard.position.y,
+                     hazard.hazard_type)
+```
+
+---
+
+## Validation
+
+The parser validates task specifications before environment creation. Validation catches:
+
+1. **Dimension Checks**: Minimum 3x3 grid size
+2. **Bounds Checks**: All positions within grid dimensions
+3. **Wall Conflicts**: Start/goal not on walls
+4. **Color Consistency**: Doors have matching key colors
+5. **ID References**: Switches control valid gate IDs
+6. **Tier Validity**: Difficulty tier in range [1, 5]
+7. **Max Steps**: Positive step limit
+
+**Example Validation Errors**:
+
+```python
+# Task with invalid door (no matching key)
+spec = TaskSpecification.from_dict({
+    "task_id": "broken",
+    "seed": 42,
+    "difficulty_tier": 1,
+    "max_steps": 100,
+    "maze": {
+        "dimensions": [8, 8],
+        "start": [1, 1],
+        "goal": [6, 6],
+        "walls": []
+    },
+    "mechanisms": {
+        "doors": [{
+            "id": "door1",
+            "position": [4, 4],
+            "requires_key": "red",  # No red key!
+            "initial_state": "locked"
+        }],
+        "keys": []  # Empty!
+    },
+    "goal": {"type": "reach_position", "target": [6, 6]}
+})
+
+parser = TaskParser()
+try:
+    env = parser.parse(spec)
+except ValueError as e:
+    print(e)
+    # Output: Invalid task specification: Door door1 requires color 'red'
+    #         but no key of that color exists
+```
+
+---
+
+## Integration with Backends
+
+The Task Parser is used by backend implementations (MiniGridBackend, MultiGridBackend) to create environments from task specifications.
+
+```python
+# Backend usage (simplified)
+class MiniGridBackend(AbstractGridBackend):
+    def __init__(self, render_mode="rgb_array"):
+        self.parser = TaskParser(render_mode=render_mode)
+
+    def configure(self, task_spec: TaskSpecification):
+        self.task_spec = task_spec
+
+    def reset(self, seed=None):
+        # Parser creates and populates environment
+        self.env = self.parser.parse(self.task_spec, seed=seed)
+        # Environment is ready to use
+        return self.env.render(), self._get_grid_state(), {}
+```
+
+---
+
+## Performance Considerations
+
+### Memory Usage
+
+- Each `parse()` call creates a new environment instance
+- Environments hold grid state, object references, and render buffers
+- For evaluation loops, reuse the parser but create fresh environments per seed
+
+### Computation Time
+
+Parsing is dominated by:
+1. **Grid initialization**: O(width × height) to create empty grid
+2. **Object placement**: O(num_objects) to place all mechanisms
+3. **Validation**: O(num_objects) to check consistency
+
+Typical parse time: **< 10ms** for 8x8 grid with 10-20 objects
+
+### Best Practices
+
+```python
+# GOOD: Reuse parser, create fresh environments
+parser = TaskParser(render_mode="rgb_array")
+for task_file in task_files:
+    spec = TaskSpecification.from_json(task_file)
+    env = parser.parse(spec)
+    # Use environment...
+    env.close()
+
+# AVOID: Creating parser per task (unnecessary overhead)
+for task_file in task_files:
+    parser = TaskParser(render_mode="rgb_array")  # Wasteful!
+    env = parser.parse_file(task_file)
+    # Use environment...
+```
+
+---
+
+## Common Issues and Solutions
+
+### Issue 1: Objects Disappearing After Reset
+
+**Problem**: Objects placed before `reset()` are lost.
+
+**Cause**: MiniGrid's `reset()` method calls `_gen_grid()`, which creates a fresh empty grid.
+
+**Solution**: Always place objects *after* calling `reset()`. The parser handles this correctly.
+
+```python
+# WRONG
+env = CustomMiniGridEnv(...)
+env.place_key(3, 3, "red")  # Placed before reset
+env.reset()  # Key is now gone!
+
+# CORRECT (what parser does)
+env = CustomMiniGridEnv(...)
+env.reset()  # Initialize grid
+env.place_key(3, 3, "red")  # Now the key stays
+```
+
+### Issue 2: Switch References Invalid Gate
+
+**Problem**: `ValueError` when switch controls non-existent gate.
+
+**Cause**: Gates must exist before switches are placed.
+
+**Solution**: The parser places gates before switches. Ensure your TaskSpecification has matching gate IDs.
+
+```python
+# Task spec should have:
+"mechanisms": {
+    "gates": [{"id": "gate1", ...}],
+    "switches": [{"id": "sw1", "controls": ["gate1"], ...}]
+}
+```
+
+### Issue 3: Agent Spawns in Wrong Position
+
+**Problem**: Agent not at expected start position.
+
+**Cause**: Another object placed at start position.
+
+**Solution**: Parser places agent last to overwrite any conflicts. Check your task specification for position conflicts.
+
+---
+
+## See Also
+
+- [TaskSpecification Schema](../gridworld/task_spec.py): JSON format for tasks
+- [CustomMiniGridEnv](../gridworld/custom_env.py): The environment class created by parser
+- [MiniGridBackend Documentation](./minigrid_backend.md): Integration with backend system
+- [MultiNet Task Generation Guide](../../docs/task_generation.md): Creating evaluation tasks
diff --git a/src/v1_1/docs/technical_design.md b/src/v1_1/docs/technical_design.md
new file mode 100644
index 00000000..c955f468
--- /dev/null
+++ b/src/v1_1/docs/technical_design.md
@@ -0,0 +1,1387 @@
+# Technical Design Document: MultiNet v1.1 GridWorld Framework
+
+## Document Overview
+
+This document provides the technical rationale and architectural decisions behind the MultiNet v1.1 GridWorld evaluation framework. It explains why certain technologies were chosen, how components interact, and the forward-looking vision for cross-domain evaluation.
+
+**Target Audience**: Researchers, contributors, and engineers extending the framework
+
+**Last Updated**: 2026-02-06
+
+---
+
+## Table of Contents
+
+1. [Technology Stack and Justification](#1-technology-stack-and-justification)
+2. [Why Non-Square Tilings Matter](#2-why-non-square-tilings-matter)
+3. [Architecture Decisions](#3-architecture-decisions)
+4. [Cross-Domain Vision](#4-cross-domain-vision-forward-looking)
+5. [Evaluation Methodology](#5-evaluation-methodology)
+
+---
+
+## 1. Technology Stack and Justification
+
+### 1.1 Why MiniGrid (Farama Foundation)
+
+**MiniGrid** is the production-ready backend for square grid environments, built on the mature Gymnasium (formerly OpenAI Gym) ecosystem.
+
+#### Technical Advantages
+
+**1. Maturity and Stability**
+- Actively maintained by Farama Foundation (successor to OpenAI Gym)
+- Used in hundreds of RL research papers since 2017
+- Battle-tested codebase with well-understood edge cases
+- Stable API with semantic versioning
+
+**2. Rich Feature Set**
+- 7-action discrete space: turn_left, turn_right, forward, pickup, drop, toggle, done
+- Partial observability: Agent has limited field of view (7x7 grid by default)
+- Built-in rendering: High-quality RGB visualizations and human-readable text mode
+- Standard observation types: Symbolic (grid encoding) and visual (RGB images)
+
+**3. Community and Ecosystem**
+- Large user base provides extensive examples and troubleshooting resources
+- Compatible with RL libraries: Stable-Baselines3, RLlib, CleanRL
+- Well-documented: Official docs at minigrid.farama.org
+- Active community on GitHub and Discord
+
+**4. Performance Characteristics**
+```
+Operation               Time        Memory
+------------------------------------------
+Environment creation    ~10 ms      ~50 KB
+Episode (100 steps)     ~400 ms     ~200 KB
+Observation rendering   ~3 ms       ~150 KB (64x64x3)
+```
+
+Fast enough for large-scale evaluation (1000s of episodes).
+
+#### Built-in Mechanisms
+
+MiniGrid natively supports:
+- **Keys and Doors**: Collectible keys unlock matching color-coded doors
+- **Boxes**: Pushable Sokoban-style blocks
+- **Lava**: Episode-ending hazard tiles
+- **Walls**: Static barriers for maze construction
+
+We extended MiniGrid with:
+- **Switches and Gates**: Remote-controlled barriers
+- **Goal Markers**: Explicit visual goal positions
+- **Teleporters**: Instant transport between positions (v1.2 planned)
+
+#### Limitations
+
+**1. Square-Only Topology**
+- Hardcoded 4-connected grid (N/S/E/W movement)
+- Agent direction restricted to 4 cardinal directions
+- Cannot represent hexagonal or triangular spatial relationships
+
+**2. Rigid Object System**
+- Object types are hardcoded Python classes
+- Adding new object types requires modifying core MiniGrid code
+- Limited extensibility for custom mechanisms
+
+**3. Rendering Pipeline**
+- Tile-based rendering assumes square cells
+- Cannot easily render non-square tilings
+- Sprite system optimized for 90-degree rotations
+
+**4. Distribution Shift Risk**
+- Models trained predominantly on MiniGrid may overfit to square-grid patterns
+- Success on MiniGrid doesn't guarantee understanding of spatial reasoning (see Section 2)
+
+#### When to Use MiniGrid
+
+**Recommended for:**
+- Production evaluation of agents on standard gridworld tasks
+- Tasks requiring partial observability and memory
+- Benchmarking against existing MiniGrid baselines
+- Maximum performance and stability requirements
+
+**Not suitable for:**
+- Testing topology invariance
+- Exotic tiling research (hex, triangle, Penrose, etc.)
+- Tasks requiring novel object types not in MiniGrid
+
+---
+
+### 1.2 Why MultiGrid (Custom Implementation)
+
+**MultiGrid** is an experimental backend designed for research on exotic grid tilings and spatial topology invariance.
+
+#### Core Innovation: Adjacency Graph Architecture
+
+Unlike MiniGrid's hardcoded coordinate system, MultiGrid represents grids as **adjacency graphs**:
+
+```python
+# Square tiling: Cell has 4 neighbors
+cell_neighbors = {
+    "N": cell_id + width,
+    "E": cell_id + 1,
+    "S": cell_id - width,
+    "W": cell_id - 1
+}
+
+# Hexagonal tiling: Cell has 6 neighbors
+cell_neighbors = {
+    "N": ...,   "NE": ...,
+    "SE": ...,  "S": ...,
+    "SW": ...,  "NW": ...
+}
+
+# Triangular tiling: Cell has 3 or 9 neighbors (depends on orientation)
+cell_neighbors = {
+    "APEX_UP": [...],    # Upward-pointing triangle
+    "APEX_DOWN": [...]   # Downward-pointing triangle
+}
+```
+
+This abstraction enables **tiling-agnostic algorithms**. The same pathfinding or agent logic works on any tiling without code changes.
+
+#### Key Technical Features
+
+**1. Normalized Coordinate System**
+
+All positions are stored in normalized [0,1] × [0,1] space:
+
+```python
+# Grid coordinate (3, 5) in 8×8 grid
+normalized_pos = (3/8, 5/8) = (0.375, 0.625)
+```
+
+**Why normalize?**
+- **Cross-tiling compatibility**: Same task specification works on square, hex, and triangle grids
+- **Resolution independence**: Tasks scale to different grid sizes without rewriting coordinates
+- **Domain transfer**: Same normalized coordinates can map to other domains (see Section 4)
+
+**2. Extensible Object Registry**
+
+MultiGrid uses a registry pattern for objects:
+
+```python
+class ObjectRegistry:
+    _types = {
+        "movable": MovableObject,
+        "wall": WallObject,
+        "zone": ZoneObject,
+        "teleporter": TeleporterObject
+    }
+```
+
+Adding new object types doesn't require modifying core environment code.
+
+**3. Goal Specification System**
+
+Rich goal types beyond "reach position":
+
+```python
+goals = {
+    "reach_position": {"target": (0.5, 0.5)},
+    "collect_all": {"target_ids": ["key1", "key2"]},
+    "push_block_to": {"block_id": "block1", "target": (0.7, 0.7)},
+    "survive_steps": {"min_steps": 100},
+    "zone_occupation": {"zone_id": "goal_zone", "duration": 10}
+}
+```
+
+#### Technical Tradeoffs
+
+**Advantages:**
+- Arbitrary tilings without code changes
+- Normalized coordinates enable cross-domain transfer
+- Extensible object and goal systems
+- Research-friendly architecture
+
+**Disadvantages:**
+- Immature: Fewer users, less tested
+- Slower: ~600-900ms per episode (vs 400ms for MiniGrid)
+- Incomplete: Switches/gates not yet implemented
+- No partial observability yet
+- Rendering quality variable for exotic tilings
+
+#### Performance Overhead
+
+```
+Operation               MiniGrid    MultiGrid   Overhead
+----------------------------------------------------------
+Configure task          ~0.1 ms     ~5 ms       50x
+Reset environment       ~10 ms      ~15 ms      1.5x
+Step execution          ~3 ms       ~5 ms       1.67x
+Render                  ~4 ms       ~8 ms       2x
+----------------------------------------------------------
+100-step episode        ~400 ms     ~600 ms     1.5x
+```
+
+**Bottlenecks:**
+1. Cell ID ↔ normalized coordinate conversions (happens every step)
+2. Neighbor computation for non-square tilings (hexagons have 6 neighbors vs 4 for squares)
+3. Rendering complex polygon shapes (triangles, hexagons)
+
+**Optimization opportunities:**
+- Cache coordinate conversions
+- Precompute neighbor maps
+- Vectorize rendering operations
+
+#### When to Use MultiGrid
+
+**Recommended for:**
+- Research on topology invariance and spatial reasoning
+- Testing agent generalization across grid types
+- Exploring novel spatial representations
+- Prototyping new object types and mechanisms
+
+**Not suitable for:**
+- Production evaluation (use MiniGrid)
+- Large-scale benchmarking (too slow)
+- Tasks requiring all mechanisms (switches/gates incomplete)
+- Time-critical applications
+
+---
+
+### 1.3 Feature Comparison Matrix
+
+| Feature | MiniGrid | MultiGrid | Notes |
+|---------|----------|-----------|-------|
+| **Status** | Production | Experimental | MiniGrid is battle-tested |
+| **Maturity** | High | Low | MultiGrid needs more testing |
+| **Tilings** | Square only | Square/Hex/Triangle | MultiGrid's key innovation |
+| **Performance** | ~400ms/episode | ~600-900ms/episode | MiniGrid 1.5-2x faster |
+| **Mechanisms** | | | |
+| - Keys/Doors | ✓ | Partial | Door unlocking incomplete in MultiGrid |
+| - Switches/Gates | ✓ | ✗ | Not yet in MultiGrid |
+| - Pushable Blocks | ✓ | ✓ | Both support |
+| - Hazards (Lava) | ✓ | ✗ | Not yet in MultiGrid |
+| - Teleporters | ✗ | ✓ | MultiGrid native support |
+| - Zones | ✗ | ✓ | MultiGrid native support |
+| **Partial Obs** | ✓ | ✗ | MultiGrid planned v1.2 |
+| **Rendering Quality** | High | Variable | Hex/triangle rendering experimental |
+| **Community** | Large | Small | MiniGrid has 8+ years community |
+| **Documentation** | Extensive | Limited | MiniGrid has official docs |
+| **RL Library Support** | Full | Partial | MiniGrid works with SB3, RLlib |
+| **Use Case** | Standard eval | Topology research | Choose based on needs |
+
+---
+
+## 2. Why Non-Square Tilings Matter
+
+### 2.1 The Distribution Shift Hypothesis
+
+**Core Hypothesis**: Models trained predominantly on square-grid environments may develop spatial reasoning heuristics that only work on 4-connected grids. Success on square grids could reflect **interface memorization** rather than genuine spatial understanding.
+
+#### Evidence for Distribution Shift
+
+**1. Prevalence of Square Grids in Training Data**
+
+Modern vision-language-action models are trained on:
+- **Atari games**: All use square pixel grids with 4-directional movement
+- **GridWorld RL environments**: MiniGrid, DeepMind Lab, Procgen all use square grids
+- **Video games**: Vast majority use square tile maps (Minecraft, Pokémon, roguelikes)
+- **Robot navigation**: Indoor environments often represented as 2D occupancy grids (square cells)
+
+**2. Shortcut Learning Risk**
+
+Models may learn spurious correlations:
+- "Moving right twice is equivalent to moving forward twice if I'm facing east"
+- "Obstacles are always at Manhattan distance increments"
+- "The world has 4 degrees of rotational symmetry"
+
+These heuristics work perfectly on square grids but fail on hexagonal or triangular topologies.
+
+**3. Generalization Failure Example**
+
+Consider a simple navigation task: "Go from position A to position B while avoiding wall at position C."
+
+On a **square grid** (4 neighbors):
+```
+A . . B
+. W . .
+. . . .
+```
+Optimal path length: 3 steps (right, right, up or similar)
+
+On a **hexagonal grid** (6 neighbors):
+```
+  A   .   B
+    .   W   .
+  .   .   .
+```
+Optimal path length: 2 steps (northeast, east or similar)
+
+If a model memorizes "3 steps is optimal for this distance," it fails on the hex grid.
+
+### 2.2 Hexagonal Grids
+
+**Mathematical Properties:**
+- **6-connected**: Each cell has 6 neighbors
+- **Equidistant neighbors**: All neighbors are the same distance (unlike squares where diagonals are √2x farther)
+- **120° rotational symmetry**: Natural for systems with 3-fold or 6-fold symmetry
+- **Optimal packing**: Hexagons tile the plane with minimal perimeter for given area
+
+**Real-World Applications:**
+- **Strategy games**: Civilization, Catan, Axis & Allies
+- **Nature**: Honeycombs, crystal structures, turtle shells
+- **Geographic grids**: Some GIS systems use hexagonal cells for regional analysis
+- **Path planning**: Hexagonal grids provide smoother diagonal movement
+
+**What Hexagonal Grids Test:**
+
+1. **Direction Concept vs Pattern Matching**
+   - Square grid agent might memorize "turn_right = direction + 1 mod 4"
+   - Hex grid requires "turn_right = direction + 1 mod 6"
+   - Tests whether model understands angular rotation vs memorizes turning mechanics
+
+2. **Distance Computation**
+   - Square grids: Manhattan distance (|x1-x2| + |y1-y2|)
+   - Hex grids: Cube coordinate distance (different formula)
+   - Tests whether model understands proximity vs memorizes step counting
+
+3. **Adjacency Understanding**
+   - Square: 4 neighbors (N/E/S/W)
+   - Hex: 6 neighbors (N/NE/SE/S/SW/NW)
+   - Tests whether model understands "adjacent cell" as a concept vs memorizes 4-directional offsets
+
+**Example Task: Navigation with Obstacle**
+
+```python
+# Task specification (normalized coordinates)
+task = {
+    "agent_start": (0.2, 0.2),
+    "goal": (0.8, 0.8),
+    "walls": [(0.5, 0.5), (0.5, 0.6)]
+}
+
+# Square grid: Agent must go around (6-8 steps)
+# Hex grid: Agent can navigate more directly (4-5 steps)
+# Model must adapt strategy to topology
+```
+
+### 2.3 Triangular Grids
+
+**Mathematical Properties:**
+- **3-connected**: Each triangle has 3 edge-adjacent neighbors
+- **Variable connectivity**: 9-connected if considering vertex neighbors
+- **Minimal connectivity**: Forces longer paths and deeper planning
+- **Two orientations**: Upward-pointing (Δ) and downward-pointing (▽) triangles
+
+**What Triangular Grids Test:**
+
+1. **Planning Depth**
+   - Fewer neighbors per cell means longer paths
+   - Tests whether model can plan ahead multiple steps
+   - Exposes greedy policies that don't work with 3-way branching
+
+2. **Orientation Handling**
+   - Triangles have different adjacency depending on orientation (Δ vs ▽)
+   - Tests whether model can handle position-dependent navigation rules
+
+3. **Minimal Topology**
+   - Simplest non-trivial tiling (3 sides per cell)
+   - Cleanest test of "can model navigate non-square grids?"
+
+**Example Task: Forced Long Path**
+
+```python
+# Same start and goal as hex example
+# Triangle grid: ~7-9 steps (fewer branching options)
+# Model must commit to longer plans without greedy shortcuts
+```
+
+### 2.4 Archimedean Tilings (Future Work)
+
+**Archimedean tilings** use multiple regular polygons. Example: **3-4-6-4 tiling** (triangle-square-hexagon-square pattern).
+
+**Why This Is The Ultimate Test:**
+
+1. **Heterogeneous Neighborhoods**: Some cells have 3 neighbors, others 4, 6, or 8
+2. **No Global Patterns**: Model cannot memorize "every cell has N neighbors"
+3. **Position-Dependent Rules**: Navigation strategy must adapt per cell
+4. **Maximum Adversarial**: Most different from training distribution
+
+**Example: 4-8-8 Tiling**
+
+```
+┌─────┬─────┐
+│  □  │  ◯  │  □ = square (4 neighbors)
+├─────┼─────┤  ◯ = octagon (8 neighbors)
+│  ◯  │  □  │
+└─────┴─────┘
+```
+
+Model navigating this grid must:
+- Detect current cell type (square vs octagon)
+- Adjust movement strategy dynamically
+- Plan paths considering variable branching factor
+
+### 2.5 Contamination Resistance
+
+**Problem**: Modern VLMs are trained on massive web-scale datasets (LAION-5B, Common Crawl, etc.). If MiniGrid environment screenshots appear in training data, models may memorize task solutions rather than learn spatial reasoning.
+
+**Why Exotic Tilings Help:**
+
+1. **Rarity**: Hexagonal and triangular gridworld environments are uncommon in training data
+2. **Novel Visuals**: Rendering style differs from typical game screenshots
+3. **Controlled Distribution**: We generate tasks programmatically, ensuring no data leakage
+4. **Cleaner Signal**: Performance differences between square and hex grids isolate topology understanding
+
+**Evaluation Strategy:**
+
+```python
+# Compare same agent on same task across tilings
+results = {
+    "square": evaluate(agent, task, tiling="square"),
+    "hex": evaluate(agent, task, tiling="hex"),
+    "triangle": evaluate(agent, task, tiling="triangle")
+}
+
+# Generalization gap = performance drop on exotic tilings
+gap = results["square"]["success_rate"] - results["hex"]["success_rate"]
+
+# Ideal: gap ≈ 0 (topology-invariant reasoning)
+# Reality: gap > 0 (some overfitting to square grids)
+```
+
+---
+
+## 3. Architecture Decisions
+
+### 3.1 Why Adjacency Graphs Over Coordinate Grids
+
+**Traditional Approach (MiniGrid):**
+
+```python
+# Hardcoded coordinate arithmetic
+def move_forward(agent_pos, agent_dir):
+    if agent_dir == 0:  # East
+        return (agent_pos[0] + 1, agent_pos[1])
+    elif agent_dir == 1:  # South
+        return (agent_pos[0], agent_pos[1] + 1)
+    # ... hardcoded for 4 directions
+```
+
+**Problem**: Cannot generalize to 6-directional (hex) or variable-directional (triangle) grids.
+
+**MultiGrid Approach:**
+
+```python
+# Tiling-agnostic adjacency graph
+class Tiling(ABC):
+    def get_neighbors(self, cell_id: int) -> dict[str, int]:
+        """Return mapping of direction names to neighbor cell IDs."""
+        pass
+
+# Works for any tiling
+def move_forward(agent_cell, agent_dir, tiling):
+    neighbors = tiling.get_neighbors(agent_cell)
+    return neighbors[agent_dir]  # No hardcoded arithmetic!
+```
+
+**Advantages:**
+
+1. **Tiling Independence**: Same code works for square, hex, triangle, Penrose, Voronoi, etc.
+2. **Extensibility**: Add new tilings without modifying core logic
+3. **Correctness**: Neighbor relationships defined once per tiling, not scattered throughout codebase
+4. **Testing**: Each tiling has isolated test suite
+
+**Design Pattern: Strategy Pattern**
+
+```python
+# Abstract interface
+class Tiling(ABC):
+    @abstractmethod
+    def generate_grid(self, width, height) -> Graph: pass
+
+    @abstractmethod
+    def get_neighbors(self, cell_id) -> dict[str, int]: pass
+
+    @abstractmethod
+    def cell_to_canonical(self, cell_id) -> tuple[float, float]: pass
+
+# Concrete implementations
+class SquareTiling(Tiling): ...
+class HexTiling(Tiling): ...
+class TriangleTiling(Tiling): ...
+
+# Usage
+tiling = HexTiling()
+graph = tiling.generate_grid(8, 8)
+neighbors = tiling.get_neighbors(cell_id=42)
+```
+
+### 3.2 Why Gymnasium API Compatibility Matters
+
+**Gymnasium** (formerly OpenAI Gym) is the de facto standard for RL environments.
+
+**Standard Interface:**
+
+```python
+# All Gymnasium environments implement this
+env = gym.make("MiniGrid-DoorKey-8x8-v0")
+observation, info = env.reset(seed=42)
+
+done = False
+while not done:
+    action = agent.predict(observation)
+    observation, reward, terminated, truncated, info = env.step(action)
+    done = terminated or truncated
+```
+
+**Why This Matters:**
+
+1. **RL Library Integration**: Stable-Baselines3, RLlib, CleanRL all expect Gymnasium API
+2. **Benchmarking**: Papers can directly compare against Gymnasium baselines
+3. **Tooling**: Visualization tools, logging, and monitoring assume Gymnasium
+4. **Reproducibility**: Standard API reduces implementation variance between research groups
+
+**MultiGrid Compliance:**
+
+```python
+class MultiGridEnv(gym.Env):
+    """Fully Gymnasium-compatible environment."""
+
+    def reset(self, seed=None, options=None):
+        # Standard return: (observation, info)
+        return observation, info
+
+    def step(self, action):
+        # Standard return: (obs, reward, terminated, truncated, info)
+        return obs, reward, terminated, truncated, info
+```
+
+### 3.3 Why Canonical [0,1] Coordinates for Cross-Domain Transfer
+
+**Problem**: Different domains use different coordinate systems.
+
+**Examples:**
+
+| Domain | Coordinate System | Range |
+|--------|-------------------|-------|
+| GridWorld | Integer cell indices | [0, width) × [0, height) |
+| Physics (MuJoCo) | Continuous world space | (-∞, +∞) × (-∞, +∞) |
+| Natural Language | No spatial coordinates | N/A |
+| GUI (Pygame) | Pixel coordinates | [0, screen_width) × [0, screen_height) |
+
+**Solution: Normalized Canonical Coordinates**
+
+All positions are represented in [0,1] × [0,1] space:
+
+```python
+# Task specification (domain-agnostic)
+task = {
+    "agent_start": (0.2, 0.2),
+    "goal": (0.8, 0.8),
+    "obstacles": [(0.5, 0.5)]
+}
+
+# GridWorld adapter
+def to_grid(pos, grid_size):
+    return (int(pos[0] * grid_size[0]), int(pos[1] * grid_size[1]))
+
+# Physics adapter (MuJoCo)
+def to_physics(pos, world_bounds):
+    x = world_bounds[0] + pos[0] * (world_bounds[1] - world_bounds[0])
+    y = world_bounds[2] + pos[1] * (world_bounds[3] - world_bounds[2])
+    return (x, y)
+
+# GUI adapter (Pygame)
+def to_pixels(pos, screen_size):
+    return (int(pos[0] * screen_size[0]), int(pos[1] * screen_size[1]))
+```
+
+**Advantages:**
+
+1. **Domain Independence**: Same task definition works across all domains
+2. **Resolution Independence**: Tasks scale to different grid/screen sizes
+3. **Human Interpretability**: Normalized coordinates are intuitive (0.5 = center)
+4. **Transfer Learning**: Agents trained on gridworld can be tested on physics sim with same task
+
+**Precision Considerations:**
+
+```python
+# Potential precision loss with integer grids
+grid_pos = (3, 5) in 8×8 grid
+normalized = (0.375, 0.625)
+back_to_grid = (int(0.375 * 8), int(0.625 * 8)) = (3, 5) ✓
+
+# Loss with non-power-of-2 dimensions
+grid_pos = (3, 5) in 7×7 grid
+normalized = (0.428571, 0.714286)
+back_to_grid = (int(0.428571 * 7), int(0.714286 * 7)) = (2, 5) ✗
+```
+
+**Recommendation**: Use power-of-2 dimensions (8×8, 16×16) for lossless round-tripping.
+
+### 3.4 Action Space Design
+
+**MiniGrid Standard (7 Actions):**
+
+```python
+actions = {
+    0: "turn_left",    # Rotate counterclockwise
+    1: "turn_right",   # Rotate clockwise
+    2: "forward",      # Move in facing direction
+    3: "pickup",       # Pick up object in front
+    4: "drop",         # Drop held object
+    5: "toggle",       # Interact (open door, press switch)
+    6: "done"          # Signal completion (no-op)
+}
+```
+
+**MultiGrid Extension (9 Actions):**
+
+```python
+actions = {
+    0: "FORWARD",      # Move forward
+    1: "BACKWARD",     # Move backward (new!)
+    2: "TURN_LEFT",    # Rotate CCW
+    3: "TURN_RIGHT",   # Rotate CW
+    4: "PICKUP",       # Pick up object
+    5: "DROP",         # Drop object
+    6: "PUSH",         # Push object forward
+    7: "WAIT",         # No-op
+    8: "TELEPORT"      # Use teleporter (if on one)
+}
+```
+
+**Action Translation Layer:**
+
+```python
+# Backend automatically translates MiniGrid actions to MultiGrid
+minigrid_to_multigrid = {
+    0: 2,  # turn_left → TURN_LEFT
+    1: 3,  # turn_right → TURN_RIGHT
+    2: 0,  # forward → FORWARD
+    3: 4,  # pickup → PICKUP
+    4: 5,  # drop → DROP
+    5: 6,  # toggle → PUSH
+    6: 7   # done → WAIT
+}
+```
+
+**Why Translation Matters:**
+
+1. **Policy Reuse**: Same agent code works on both backends
+2. **Comparative Evaluation**: Test same policy on MiniGrid and MultiGrid
+3. **Backward Compatibility**: Existing MiniGrid agents work on exotic tilings
+
+**Design Tradeoff: Absolute vs Relative Actions**
+
+```python
+# Option A: Absolute actions (not used)
+actions = ["move_north", "move_east", "move_south", "move_west"]
+# Problem: Doesn't work on hex (6 directions) or triangle (variable)
+
+# Option B: Relative actions (chosen)
+actions = ["turn_left", "turn_right", "forward"]
+# Benefit: Works on any tiling (just adjust turn angle)
+```
+
+Relative actions generalize to arbitrary tilings because they're ego-centric.
+
+### 3.5 File-Based Task Interface
+
+**Design Decision**: Tasks are defined in JSON files, not Python code.
+
+**JSON Task Specification:**
+
+```json
+{
+    "task_id": "tier2_key_door_001",
+    "seed": 42,
+    "difficulty_tier": 2,
+    "max_steps": 100,
+    "maze": {
+        "dimensions": [8, 8],
+        "start": [1, 1],
+        "goal": [6, 6],
+        "walls": [[3, 3], [3, 4]]
+    },
+    "mechanisms": {
+        "keys": [{"id": "k1", "position": [2, 2], "color": "red"}],
+        "doors": [{"id": "d1", "position": [4, 4], "requires_key": "red"}]
+    },
+    "goal": {"type": "reach_position", "target": [6, 6]}
+}
+```
+
+**Advantages:**
+
+1. **Language Agnostic**: Can be used from any language (Python, Julia, Rust, etc.)
+2. **Version Control**: Git-friendly plain text format
+3. **Human Readable**: Non-programmers can create tasks
+4. **Programmatic Generation**: Easy to generate task suites with scripts
+5. **Validation**: JSON schema validation catches errors early
+
+**Python ABC for Backends:**
+
+```python
+class AbstractGridBackend(ABC):
+    @abstractmethod
+    def configure(self, task_spec: TaskSpecification): pass
+
+    @abstractmethod
+    def reset(self, seed: int) -> tuple[np.ndarray, GridState, dict]: pass
+
+    @abstractmethod
+    def step(self, action: int) -> tuple[...]: pass
+```
+
+This ensures all backends implement the same interface, regardless of internal implementation.
+
+---
+
+## 4. Cross-Domain Vision (Forward-Looking)
+
+### 4.1 The Four Domains
+
+**Goal**: Same task definition works across four different embodiments.
+
+**Domain 1: GridWorld** (Current Implementation)
+- Square/hex/triangle tilings
+- Discrete cell-based navigation
+- Turn-based action execution
+- 2D top-down view
+
+**Domain 2: Physics Simulation** (Planned v1.2)
+- MuJoCo or PyBullet physics engine
+- Continuous 2D or 3D space
+- Continuous control (velocity, force)
+- Physical collisions and dynamics
+
+**Domain 3: Natural Language** (Planned v1.3)
+- Text-based interactive fiction
+- Parser-based commands ("go north", "take key")
+- ASCII or text descriptions
+- Pure language reasoning
+
+**Domain 4: GUI (Pygame)** (Planned v1.4)
+- Visual game interface
+- Mouse click and keyboard controls
+- Real-time or turn-based
+- Rich graphics and animations
+
+### 4.2 Canonical Task Specification as Shared Representation
+
+**Core Idea**: A single JSON task specification gets translated to each domain.
+
+**Example Task: Key-Door Puzzle**
+
+```json
+{
+    "task_id": "cross_domain_001",
+    "agent_start": [0.2, 0.2],
+    "goal": [0.8, 0.8],
+    "objects": [
+        {"type": "key", "id": "k1", "position": [0.3, 0.4], "color": "red"},
+        {"type": "door", "id": "d1", "position": [0.5, 0.5], "color": "red"}
+    ]
+}
+```
+
+**Domain Translations:**
+
+**GridWorld:**
+```python
+# 8×8 grid
+agent_start = (1, 1)  # 0.2 * 8 = 1.6 → 1
+goal = (6, 6)         # 0.8 * 8 = 6.4 → 6
+key_pos = (2, 3)
+door_pos = (4, 4)
+```
+
+**Physics (MuJoCo):**
+```python
+# 10m × 10m world
+agent_start = (2.0, 2.0)  # 0.2 * 10
+goal = (8.0, 8.0)
+key = PhysicsBody(position=(3.0, 4.0), shape="cube", color="red")
+door = PhysicsWall(position=(5.0, 5.0), color="red", passable=False)
+```
+
+**Natural Language:**
+```
+You are in a small room. To the NORTH, you see a RED KEY.
+To the EAST, there is a RED DOOR (locked). The goal is to the NORTHEAST.
+
+> take key
+You pick up the red key.
+
+> go east
+The door is locked. You need a red key.
+
+> unlock door
+You unlock the door with the red key. The door opens.
+
+> go east
+You reach the goal!
+```
+
+**GUI (Pygame):**
+```python
+# 800×800 pixel window
+agent_sprite = Sprite(position=(160, 160))
+goal_sprite = Sprite(position=(640, 640), texture="goal.png")
+key_sprite = Sprite(position=(240, 320), texture="key_red.png")
+door_sprite = Sprite(position=(400, 400), texture="door_red_locked.png")
+
+# Mouse click to move, click key to pick up, click door to unlock
+```
+
+### 4.3 Domain Adapters as Thin Translation Layers
+
+**Architecture:**
+
+```python
+# Core task specification (domain-agnostic)
+task_spec = TaskSpecification.from_json("task.json")
+
+# Domain adapters
+gridworld_env = GridWorldAdapter(task_spec, tiling="square")
+physics_env = PhysicsAdapter(task_spec, engine="mujoco")
+text_env = TextAdapter(task_spec, style="interactive_fiction")
+gui_env = GUIAdapter(task_spec, graphics="pygame")
+
+# Same evaluation code
+for env in [gridworld_env, physics_env, text_env, gui_env]:
+    obs, state, _ = env.reset(seed=42)
+    done = False
+    while not done:
+        action = agent.predict(obs)
+        obs, reward, terminated, truncated, state, _ = env.step(action)
+        done = terminated or truncated
+    print(f"Domain: {env.domain_name}, Success: {state.goal_reached}")
+```
+
+**Key Insight**: Adapters are thin. Most logic lives in the canonical task specification and shared utility functions.
+
+### 4.4 Mouse Click Support for Domain 4
+
+**Challenge**: GUI domain uses mouse clicks, not discrete actions.
+
+**Solution: Coordinate-Based Action Interface**
+
+```python
+# Standard discrete actions (Domains 1-3)
+action = 2  # forward
+
+# Coordinate-based actions (Domain 4)
+action = {"type": "click", "position": (0.6, 0.5)}
+```
+
+**Backend Handling:**
+
+```python
+class GUIAdapter(AbstractGridBackend):
+    def step(self, action):
+        if isinstance(action, int):
+            # Discrete action (keyboard shortcut)
+            return self._execute_discrete_action(action)
+        elif isinstance(action, dict) and action["type"] == "click":
+            # Mouse click action
+            pixel_pos = self._normalized_to_pixels(action["position"])
+            pygame_event = pygame.event.Event(MOUSEBUTTONDOWN, {"pos": pixel_pos})
+            return self._inject_event(pygame_event)
+```
+
+**Unified Agent Interface:**
+
+```python
+# Agent can use either action type
+class Agent:
+    def predict(self, obs, domain):
+        if domain.supports_discrete_actions:
+            return self.policy_discrete(obs)
+        else:
+            # VLM identifies clickable objects in image
+            clickable_objects = self.vlm.detect_objects(obs)
+            target = self.policy_select_object(clickable_objects)
+            return {"type": "click", "position": target.normalized_position}
+```
+
+**Example: Clicking a Key to Pick It Up**
+
+```python
+# Domain 1 (GridWorld): Discrete action
+action = 3  # pickup
+
+# Domain 4 (GUI): Click on key sprite
+key_position_pixels = (240, 320)
+key_position_normalized = (240/800, 320/800) = (0.3, 0.4)
+action = {"type": "click", "position": (0.3, 0.4)}
+```
+
+### 4.5 Cross-Domain Evaluation Strategy
+
+**Research Question**: Do agents learn task-solving strategies or domain-specific interfaces?
+
+**Evaluation Protocol:**
+
+1. **Train** on Domain 1 (GridWorld) with square tiling
+2. **Test** on:
+   - Domain 1 with hex tiling (topology shift)
+   - Domain 2 with physics (embodiment shift)
+   - Domain 3 with text (modality shift)
+   - Domain 4 with GUI (interface shift)
+
+**Metrics:**
+
+```python
+results = {
+    "gridworld_square": {"success_rate": 0.85, "avg_steps": 12},
+    "gridworld_hex": {"success_rate": 0.60, "avg_steps": 15},
+    "physics": {"success_rate": 0.45, "avg_steps": 25},
+    "text": {"success_rate": 0.30, "avg_steps": 18},
+    "gui": {"success_rate": 0.55, "avg_steps": 20}
+}
+
+# Generalization gaps
+topology_gap = results["gridworld_square"]["success_rate"] - results["gridworld_hex"]["success_rate"]
+embodiment_gap = results["gridworld_square"]["success_rate"] - results["physics"]["success_rate"]
+modality_gap = results["gridworld_square"]["success_rate"] - results["text"]["success_rate"]
+interface_gap = results["gridworld_square"]["success_rate"] - results["gui"]["success_rate"]
+```
+
+**Hypothesis**: Current VLMs will show large generalization gaps, indicating domain overfitting.
+
+---
+
+## 5. Evaluation Methodology
+
+### 5.1 Deterministic Seeds for Reproducibility
+
+**Requirement**: All random operations must use explicit seeds.
+
+**Implementation:**
+
+```python
+# Task specification includes seed
+task_spec = {
+    "task_id": "eval_001",
+    "seed": 42,  # Default seed for this task
+    ...
+}
+
+# Evaluation can override seed
+for seed in range(10):
+    obs, state, _ = backend.reset(seed=seed)
+    # Run episode with this seed
+```
+
+**Why This Matters:**
+
+1. **Reproducibility**: Other researchers can replicate exact results
+2. **Debugging**: Failed episodes can be replayed with same seed
+3. **Fair Comparison**: All models see identical task instances
+4. **Statistical Power**: Multiple seeds enable significance testing
+
+**Seeding Strategy:**
+
+```python
+# Seed controls:
+- Environment randomness (object placement if stochastic)
+- Agent policy randomness (if stochastic policy)
+- Evaluation noise (if added)
+
+# Example
+np.random.seed(seed)
+torch.manual_seed(seed)
+env.reset(seed=seed)
+agent.reset_rng(seed)
+```
+
+### 5.2 Metrics
+
+**Primary Metrics:**
+
+**1. Success Rate**
+```python
+success_rate = num_episodes_reached_goal / total_episodes
+```
+
+Binary: Did the agent reach the goal within max_steps?
+
+**2. Step Efficiency**
+```python
+step_efficiency = goal_distance / steps_taken
+```
+
+How efficiently did the agent solve the task? Lower is better.
+
+**3. Reward (for RL agents)**
+```python
+total_reward = sum(rewards_per_step)
+```
+
+MiniGrid uses time-penalized reward: `reward = 1.0 - 0.9 * (steps / max_steps)`
+
+**Secondary Metrics:**
+
+**4. Mechanism Usage**
+- Keys collected: `len(state.collected_keys)`
+- Switches activated: `len(state.active_switches)`
+- Doors unlocked: `len(state.open_doors)`
+
+**5. Path Quality**
+- Path length vs optimal path
+- Backtracking steps (revisited cells)
+
+**6. Cross-Domain Generalization Gap**
+```python
+gap = success_rate_domain_A - success_rate_domain_B
+```
+
+### 5.3 Difficulty Tiers
+
+Tasks are organized into 5 tiers based on complexity.
+
+**Tier 1: Pure Navigation**
+
+**What It Tests**: Basic pathfinding, no mechanisms
+
+**Example Task:**
+```json
+{
+    "difficulty_tier": 1,
+    "maze": {
+        "dimensions": [8, 8],
+        "start": [1, 1],
+        "goal": [6, 6],
+        "walls": []  # Empty maze or simple obstacles
+    },
+    "mechanisms": {}  # No keys, doors, etc.
+}
+```
+
+**Skills Required:**
+- Spatial awareness (where am I?)
+- Goal-directed navigation (move toward goal)
+- Obstacle avoidance (go around walls)
+
+**Evaluation:**
+- Should have >90% success rate for competent agents
+- Baseline for all other tiers
+
+---
+
+**Tier 2: Linear Dependencies**
+
+**What It Tests**: Sequential subtasks (A → B → C)
+
+**Example Task: Key-Door Puzzle**
+```json
+{
+    "difficulty_tier": 2,
+    "mechanisms": {
+        "keys": [{"id": "k1", "position": [2, 2], "color": "red"}],
+        "doors": [{"id": "d1", "position": [4, 4], "requires_key": "red"}]
+    }
+}
+```
+
+**Dependency Chain:**
+1. Navigate to key
+2. Pick up key
+3. Navigate to door
+4. Unlock door
+5. Navigate to goal
+
+**Skills Required:**
+- Subtask decomposition
+- Memory (remember where door is after picking up key)
+- Action sequencing (pickup, then unlock)
+
+**Common Failure Modes:**
+- Forgetting to pick up key
+- Trying to unlock door without key
+- Navigating to goal before unlocking door
+
+---
+
+**Tier 3: Multi-Mechanism**
+
+**What It Tests**: Parallel dependencies, multiple paths
+
+**Example Task: Multiple Keys and Switches**
+```json
+{
+    "difficulty_tier": 3,
+    "mechanisms": {
+        "keys": [
+            {"id": "k1", "position": [2, 2], "color": "red"},
+            {"id": "k2", "position": [5, 1], "color": "blue"}
+        ],
+        "doors": [
+            {"id": "d1", "position": [3, 3], "requires_key": "red"},
+            {"id": "d2", "position": [6, 3], "requires_key": "blue"}
+        ],
+        "switches": [{"id": "sw1", "position": [4, 5], "controls": ["gate1"]}],
+        "gates": [{"id": "gate1", "position": [5, 6]}]
+    }
+}
+```
+
+**Skills Required:**
+- Planning with multiple subgoals
+- Optimal ordering (which key first?)
+- Resource management (can only carry one key at a time in some variants)
+
+**Common Failure Modes:**
+- Suboptimal ordering (collect far key first)
+- Forgetting about mechanisms (activate switch but forget to use gate)
+
+---
+
+**Tier 4: Irreversibility**
+
+**What It Tests**: One-way actions, commitment
+
+**Example Task: Pushable Blocks**
+```json
+{
+    "difficulty_tier": 4,
+    "mechanisms": {
+        "blocks": [
+            {"id": "b1", "position": [3, 3], "color": "grey"},
+            {"id": "b2", "position": [4, 5], "color": "grey"}
+        ]
+    },
+    "rules": {
+        "blocks_pushable": true,
+        "blocks_reversible": false  # Can't pull, only push
+    }
+}
+```
+
+**Irreversible Actions:**
+- Pushing blocks (can't unpush)
+- Consumable keys (key disappears after use)
+- One-shot switches (can only activate once)
+
+**Skills Required:**
+- Lookahead planning (will this push block me in?)
+- Backtracking avoidance
+- Commitment to plans
+
+**Common Failure Modes:**
+- Pushing block into corner (deadlock)
+- Consuming key prematurely
+- Activating one-shot switch before positioning
+
+---
+
+**Tier 5: Hidden Information**
+
+**What It Tests**: Memory, exploration, inference
+
+**Example Task: Hidden Switch**
+```json
+{
+    "difficulty_tier": 5,
+    "mechanisms": {
+        "switches": [
+            {"id": "sw1", "position": [2, 3], "visibility": "hidden"}
+        ],
+        "gates": [
+            {"id": "gate1", "position": [5, 5]}
+        ]
+    },
+    "rules": {
+        "partial_observability": true
+    }
+}
+```
+
+**Hidden Information:**
+- Hidden switches (invisible until discovered)
+- Partial observability (limited vision radius)
+- Teleporters (destination unknown until used)
+- Color inference (must deduce which key opens which door)
+
+**Skills Required:**
+- Exploration (systematic search for hidden objects)
+- Memory (remember locations outside current view)
+- Inference (deduce rules from observations)
+
+**Common Failure Modes:**
+- Incomplete exploration (miss hidden switch)
+- Forgetting locations (walk past goal because it's out of view)
+- Incorrect inference (wrong key-door pairing)
+
+### 5.4 Live Benchmark Strategy
+
+**Problem**: Fixed benchmarks can be memorized by models trained on leaked data.
+
+**Solution: Procedural Generation + Difficulty Estimation**
+
+**Procedural Generation:**
+
+```python
+def generate_task(difficulty_tier, seed):
+    """Generate a random task at specified difficulty."""
+    rng = np.random.RandomState(seed)
+
+    # Generate maze
+    grid_size = 8 + difficulty_tier * 2  # Harder = bigger
+    walls = generate_maze(grid_size, density=0.1 + difficulty_tier * 0.05, rng=rng)
+
+    # Add mechanisms based on tier
+    if difficulty_tier >= 2:
+        num_keys = rng.randint(1, difficulty_tier)
+        keys = place_keys(grid_size, num_keys, walls, rng)
+        doors = place_doors_for_keys(keys, walls, rng)
+
+    if difficulty_tier >= 3:
+        num_switches = rng.randint(1, difficulty_tier - 1)
+        switches = place_switches(grid_size, num_switches, walls, rng)
+        gates = place_gates_for_switches(switches, walls, rng)
+
+    # ... etc
+
+    return TaskSpecification(...)
+```
+
+**Difficulty Estimation:**
+
+After generating a task, estimate its difficulty:
+
+```python
+def estimate_difficulty(task_spec):
+    """Estimate task difficulty using heuristics."""
+
+    # Heuristics
+    optimal_path_length = a_star(task_spec.start, task_spec.goal, task_spec.walls)
+    num_mechanisms = count_mechanisms(task_spec)
+    dependency_depth = compute_dependency_graph_depth(task_spec)
+
+    # Weighted score
+    difficulty_score = (
+        0.3 * optimal_path_length +
+        0.4 * num_mechanisms +
+        0.3 * dependency_depth
+    )
+
+    # Verify with expert policy
+    expert_success, expert_steps = run_expert(task_spec)
+    if not expert_success:
+        return "too_hard"  # Discard unsolvable tasks
+
+    if expert_steps < 10:
+        return "too_easy"  # Discard trivial tasks
+
+    return difficulty_score
+```
+
+**Evaluation Protocol:**
+
+```python
+# Generate 1000 tasks at each tier
+for tier in range(1, 6):
+    tasks = []
+    seed = tier * 10000
+
+    while len(tasks) < 1000:
+        task = generate_task(tier, seed)
+        difficulty = estimate_difficulty(task)
+
+        # Only keep tasks in appropriate difficulty range
+        tier_ranges = {1: (1, 5), 2: (5, 15), 3: (15, 30), 4: (30, 50), 5: (50, 100)}
+        min_diff, max_diff = tier_ranges[tier]
+
+        if min_diff <= difficulty <= max_diff:
+            tasks.append(task)
+
+        seed += 1
+
+    # Evaluate agent
+    results = evaluate_agent(agent, tasks)
+    print(f"Tier {tier}: Success rate = {results['success_rate']:.2%}")
+```
+
+**Advantages:**
+
+1. **Contamination Resistance**: No fixed dataset to memorize
+2. **Infinite Evaluation**: Generate fresh tasks for each evaluation
+3. **Difficulty Control**: Ensure tasks span appropriate difficulty range
+4. **Fair Comparison**: All models see same difficulty distribution
+
+**Validation:**
+
+- Run expert policy (A*) to verify solvability
+- Run human players to validate difficulty tiers
+- Compare multiple agents to establish baseline difficulty curves
+
+---
+
+## Appendix: Design Alternatives Considered
+
+### A.1 Why Not Use Unity or Unreal for Domain 4?
+
+**Considered**: Use full game engine for GUI domain
+
+**Rejected Because:**
+- Heavyweight: Unity/Unreal are multi-GB installs
+- Complexity: Steep learning curve for contributors
+- Licensing: Unity has runtime fee for certain use cases
+- Overkill: Our GUI needs are simple (2D, turn-based)
+
+**Chosen**: Pygame (lightweight, Python-native, MIT license)
+
+### A.2 Why Not Use SMARTS or Habitat for Domain 2?
+
+**Considered**: Use existing robotics simulators
+
+**Rejected Because:**
+- Overconstrained: These have specific robot embodiments
+- Complex: Hard to match canonical task specifications
+- Performance: Slower than MuJoCo for simple 2D tasks
+
+**Chosen**: MuJoCo (faster, more flexible, better documented)
+
+### A.3 Why Not Use Existing Text Adventure Engines (Z-Machine, Inform)?
+
+**Considered**: Use Infocom-style text adventure engines
+
+**Rejected Because:**
+- Parser complexity: Natural language parsing is a separate research problem
+- Compatibility: Hard to map canonical tasks to text adventure format
+- Evaluation: Unclear how to measure spatial reasoning in pure text
+
+**Chosen**: Custom text adapter with simple command set ("go north", "take key")
+
+---
+
+## Document Changelog
+
+### Version 1.0 (2026-02-06)
+- Initial technical design document
+- Covers technology stack, architecture, cross-domain vision, evaluation methodology
+- Written for MultiNet v1.1 release
+
+---
+
+## References
+
+**MiniGrid:**
+- Farama Foundation: https://minigrid.farama.org/
+- GitHub: https://github.com/Farama-Foundation/Minigrid
+- Paper: Chevalier-Boisvert et al. (2018), "Minimalistic Gridworld Environment for OpenAI Gym"
+
+**Hexagonal Grids:**
+- Red Blob Games Tutorial: https://www.redblobgames.com/grids/hexagons/
+- Birchfield & Tomasi (1998), "Depth Discontinuities by Pixel-to-Pixel Stereo"
+
+**Archimedean Tilings:**
+- Grünbaum & Shephard (1987), "Tilings and Patterns"
+- Wikipedia: https://en.wikipedia.org/wiki/Euclidean_tilings_by_convex_regular_polygons
+
+**Gymnasium API:**
+- Documentation: https://gymnasium.farama.org/
+- GitHub: https://github.com/Farama-Foundation/Gymnasium
+
+**MuJoCo:**
+- Documentation: https://mujoco.readthedocs.io/
+- Paper: Todorov et al. (2012), "MuJoCo: A physics engine for model-based control"
+
+---
+
+**End of Technical Design Document**
diff --git a/src/v1_1/environment_comparison.png b/src/v1_1/environment_comparison.png
new file mode 100644
index 00000000..b6ef108b
Binary files /dev/null and b/src/v1_1/environment_comparison.png differ
diff --git a/src/v1_1/evaluation_harness.py b/src/v1_1/evaluation_harness.py
new file mode 100644
index 00000000..79734858
--- /dev/null
+++ b/src/v1_1/evaluation_harness.py
@@ -0,0 +1,256 @@
+"""
+Evaluation Harness for MultiNet v1.1
+
+Wraps GridRunner + ModelInterface to evaluate models on MiniGrid tasks.
+Handles conversion between GridRunner's callback interface and ModelInterface.
+"""
+
+from __future__ import annotations
+
+import json
+import numpy as np
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+try:
+    from .model_interface import ModelInterface, ModelInput, ModelOutput
+    from .gridworld.runner.grid_runner import GridRunner, EpisodeResult
+    from .gridworld.backends.base import AbstractGridBackend, GridState
+    from .gridworld.backends.minigrid_backend import MiniGridBackend
+    from .gridworld.task_spec import TaskSpecification
+    from .gridworld.actions import ACTION_NAMES, ACTION_DESCRIPTIONS
+except ImportError:
+    from model_interface import ModelInterface, ModelInput, ModelOutput
+    from gridworld.runner.grid_runner import GridRunner, EpisodeResult
+    from gridworld.backends.base import AbstractGridBackend, GridState
+    from gridworld.backends.minigrid_backend import MiniGridBackend
+    from gridworld.task_spec import TaskSpecification
+    from gridworld.actions import ACTION_NAMES, ACTION_DESCRIPTIONS
+
+
+@dataclass
+class TierMetrics:
+    """Aggregate metrics for a tier of tasks."""
+    tier: int
+    num_tasks: int
+    num_success: int
+    success_rate: float
+    avg_steps: float
+    avg_reward: float
+    results: list[EpisodeResult] = field(default_factory=list, repr=False)
+
+    def to_dict(self) -> dict:
+        return {
+            "tier": self.tier,
+            "num_tasks": self.num_tasks,
+            "num_success": self.num_success,
+            "success_rate": self.success_rate,
+            "avg_steps": self.avg_steps,
+            "avg_reward": self.avg_reward,
+        }
+
+
+@dataclass
+class EvaluationResult:
+    """Complete evaluation result across all tiers."""
+    model_name: str
+    tier_metrics: dict[int, TierMetrics]
+    overall_success_rate: float
+    overall_avg_steps: float
+    overall_avg_reward: float
+
+    def to_dict(self) -> dict:
+        return {
+            "model_name": self.model_name,
+            "tier_metrics": {k: v.to_dict() for k, v in self.tier_metrics.items()},
+            "overall_success_rate": self.overall_success_rate,
+            "overall_avg_steps": self.overall_avg_steps,
+            "overall_avg_reward": self.overall_avg_reward,
+        }
+
+    def save(self, path: str) -> None:
+        with open(path, "w") as f:
+            json.dump(self.to_dict(), f, indent=2)
+
+
+class EvaluationHarness:
+    """
+    Evaluation harness that bridges ModelInterface with GridRunner.
+
+    Usage:
+        harness = EvaluationHarness(model)
+        result = harness.evaluate_task(task_spec, seed=42)
+        tier_result = harness.evaluate_tier(tier=1, task_dir="gridworld/tasks")
+        full_result = harness.evaluate_all(task_dir="gridworld/tasks")
+    """
+
+    def __init__(
+        self,
+        model: ModelInterface,
+        backend: Optional[AbstractGridBackend] = None,
+        render_mode: str = "rgb_array",
+    ):
+        self.model = model
+        self.runner = GridRunner(
+            backend=backend or MiniGridBackend(render_mode=render_mode),
+            render_mode=render_mode,
+        )
+
+    def _make_policy_fn(self):
+        """Create a policy function bridging GridRunner to ModelInterface."""
+        step_counter = [0]
+
+        def policy_fn(obs: np.ndarray, state: GridState, mission: str) -> int:
+            step_counter[0] += 1
+            model_input = ModelInput(
+                image=obs if isinstance(obs, np.ndarray) and obs.ndim == 3 else
+                      obs["image"] if isinstance(obs, dict) and "image" in obs else
+                      np.zeros((64, 64, 3), dtype=np.uint8),
+                text_prompt=mission,
+                action_space=ACTION_NAMES,
+                step_number=step_counter[0],
+                max_steps=state.max_steps,
+            )
+            output = self.model.predict(model_input)
+            return output.action
+
+        return policy_fn
+
+    def evaluate_task(
+        self,
+        task_spec: TaskSpecification,
+        seed: Optional[int] = None,
+        verbose: bool = False,
+    ) -> EpisodeResult:
+        """
+        Evaluate the model on a single task.
+
+        Args:
+            task_spec: Task to evaluate
+            seed: Random seed override
+            verbose: Print step-by-step info
+
+        Returns:
+            EpisodeResult with trajectory and metrics
+        """
+        policy_fn = self._make_policy_fn()
+        return self.runner.run_episode(
+            task_spec=task_spec,
+            policy_fn=policy_fn,
+            seed=seed,
+            verbose=verbose,
+        )
+
+    def evaluate_tier(
+        self,
+        tier: int,
+        task_dir: str = "gridworld/tasks",
+        verbose: bool = False,
+    ) -> TierMetrics:
+        """
+        Evaluate the model on all tasks in a tier.
+
+        Args:
+            tier: Difficulty tier (1-5)
+            task_dir: Base directory containing tier subdirectories
+            verbose: Print progress
+
+        Returns:
+            TierMetrics with aggregate results
+        """
+        tier_path = Path(task_dir) / f"tier{tier}"
+        if not tier_path.exists():
+            raise FileNotFoundError(f"Tier directory not found: {tier_path}")
+
+        task_files = sorted(tier_path.glob("*.json"))
+        if not task_files:
+            raise FileNotFoundError(f"No task files found in {tier_path}")
+
+        results = []
+        for task_file in task_files:
+            spec = TaskSpecification.from_json(str(task_file))
+            if verbose:
+                print(f"  Evaluating {spec.task_id}...")
+            result = self.evaluate_task(spec, verbose=verbose)
+            results.append(result)
+
+        return self._compute_tier_metrics(tier, results)
+
+    def evaluate_all(
+        self,
+        task_dir: str = "gridworld/tasks",
+        tiers: Optional[list[int]] = None,
+        verbose: bool = False,
+    ) -> EvaluationResult:
+        """
+        Evaluate the model on all tiers.
+
+        Args:
+            task_dir: Base directory containing tier subdirectories
+            tiers: List of tiers to evaluate (default: 1-5)
+            verbose: Print progress
+
+        Returns:
+            EvaluationResult with per-tier and overall metrics
+        """
+        if tiers is None:
+            tiers = [1, 2, 3, 4, 5]
+
+        tier_metrics = {}
+        all_results = []
+
+        for tier in tiers:
+            tier_path = Path(task_dir) / f"tier{tier}"
+            if not tier_path.exists():
+                if verbose:
+                    print(f"Skipping tier {tier} (directory not found)")
+                continue
+
+            if verbose:
+                print(f"\n=== Tier {tier} ===")
+
+            metrics = self.evaluate_tier(tier, task_dir, verbose=verbose)
+            tier_metrics[tier] = metrics
+            all_results.extend(metrics.results)
+
+        # Compute overall metrics
+        if all_results:
+            overall_success = sum(1 for r in all_results if r.success) / len(all_results)
+            overall_steps = sum(r.steps_taken for r in all_results) / len(all_results)
+            overall_reward = sum(r.total_reward for r in all_results) / len(all_results)
+        else:
+            overall_success = 0.0
+            overall_steps = 0.0
+            overall_reward = 0.0
+
+        return EvaluationResult(
+            model_name=self.model.model_name,
+            tier_metrics=tier_metrics,
+            overall_success_rate=overall_success,
+            overall_avg_steps=overall_steps,
+            overall_avg_reward=overall_reward,
+        )
+
+    def _compute_tier_metrics(self, tier: int, results: list[EpisodeResult]) -> TierMetrics:
+        """Compute aggregate metrics for a set of episode results."""
+        num_tasks = len(results)
+        num_success = sum(1 for r in results if r.success)
+        success_rate = num_success / num_tasks if num_tasks > 0 else 0.0
+        avg_steps = sum(r.steps_taken for r in results) / num_tasks if num_tasks > 0 else 0.0
+        avg_reward = sum(r.total_reward for r in results) / num_tasks if num_tasks > 0 else 0.0
+
+        return TierMetrics(
+            tier=tier,
+            num_tasks=num_tasks,
+            num_success=num_success,
+            success_rate=success_rate,
+            avg_steps=avg_steps,
+            avg_reward=avg_reward,
+            results=results,
+        )
+
+    def close(self):
+        """Clean up resources."""
+        self.model.teardown()
+        self.runner.close()
diff --git a/src/v1_1/example_usage.py b/src/v1_1/example_usage.py
new file mode 100644
index 00000000..b2bbc84c
--- /dev/null
+++ b/src/v1_1/example_usage.py
@@ -0,0 +1,240 @@
+#!/usr/bin/env python3
+"""
+Example usage of the MultiGrid environment.
+
+This script demonstrates the basic functionality of the MultiGrid system.
+"""
+
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from multigrid.env import MultiGridEnv
+from multigrid.agent import Action
+
+
+def basic_example():
+    """Basic example: Create environment and execute actions."""
+    print("=" * 60)
+    print("BASIC EXAMPLE: Square Grid Navigation")
+    print("=" * 60)
+
+    # Create a simple task
+    task_spec = {
+        "task_id": "example_001",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [
+                {
+                    "id": "cube_red",
+                    "type": "movable",
+                    "color": "red",
+                    "position": {"x": 0.7, "y": 0.7},
+                    "size": 0.1
+                }
+            ],
+            "agent": {
+                "position": {"x": 0.2, "y": 0.2},
+                "facing": 0  # Facing north
+            }
+        },
+        "goal": {
+            "predicate": "object_in_zone",
+            "object_id": "cube_red",
+            "zone_id": "zone_blue"
+        },
+        "limits": {"max_steps": 100},
+        "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}}
+    }
+
+    # Create environment
+    env = MultiGridEnv(task_spec, tiling="square")
+    obs, info = env.reset(seed=42)
+
+    print(f"\nInitial state:")
+    state = env.get_state_dict()
+    print(f"  Agent position: {state['agent']['cell_id']}")
+    print(f"  Agent facing: {state['agent']['facing_direction']}")
+    print(f"  Agent holding: {state['agent']['holding']}")
+
+    # Execute some actions
+    actions = [
+        (Action.FORWARD, "Move forward"),
+        (Action.TURN_RIGHT, "Turn right"),
+        (Action.FORWARD, "Move forward"),
+        (Action.FORWARD, "Move forward"),
+    ]
+
+    print(f"\nExecuting {len(actions)} actions:")
+    for action, description in actions:
+        obs, reward, terminated, truncated, info = env.step(action)
+        state = env.get_state_dict()
+
+        print(f"\n  Action: {description}")
+        print(f"    New position: {state['agent']['cell_id']}")
+        print(f"    Facing: {state['agent']['facing_direction']}")
+        print(f"    Reward: {reward:.2f}")
+        if info.get('invalid_action'):
+            print(f"    ⚠️  Invalid action!")
+
+
+def multi_tiling_example():
+    """Demonstrate the same task on different tilings."""
+    print("\n" + "=" * 60)
+    print("MULTI-TILING EXAMPLE: Same Task, Different Grids")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "example_002",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [],
+            "agent": {
+                "position": {"x": 0.5, "y": 0.5},
+                "facing": 0
+            }
+        },
+        "goal": {},
+        "limits": {"max_steps": 100},
+        "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}}
+    }
+
+    for tiling_name in ["square", "hex", "triangle"]:
+        print(f"\n{tiling_name.upper()} TILING:")
+
+        env = MultiGridEnv(task_spec, tiling=tiling_name)
+        obs, info = env.reset()
+
+        tiling = env.tiling
+        print(f"  Directions: {tiling.directions}")
+        print(f"  Direction count: {len(tiling.directions)}")
+        print(f"  Total cells: {len(tiling.cells)}")
+
+        # Check a cell's neighbors
+        first_cell_id = list(tiling.cells.keys())[50]  # Pick a middle cell
+        cell = tiling.cells[first_cell_id]
+        print(f"  Sample cell {first_cell_id} has {len(cell.neighbors)} neighbors")
+
+
+def object_interaction_example():
+    """Demonstrate object interaction (pickup, drop, push)."""
+    print("\n" + "=" * 60)
+    print("OBJECT INTERACTION EXAMPLE")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "example_003",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [
+                {
+                    "id": "cube_red",
+                    "type": "movable",
+                    "color": "red",
+                    "position": {"x": 0.4, "y": 0.2},
+                    "size": 0.1
+                }
+            ],
+            "agent": {
+                "position": {"x": 0.2, "y": 0.2},
+                "facing": 1  # Facing east
+            }
+        },
+        "goal": {},
+        "limits": {"max_steps": 100},
+        "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square")
+    obs, info = env.reset()
+
+    print(f"\nInitial state:")
+    state = env.get_state_dict()
+    print(f"  Agent: {state['agent']['cell_id']} (facing {state['agent']['facing_direction']})")
+    print(f"  Red cube: {state['objects']['cube_red']['cell_id']}")
+    print(f"  Holding: {state['agent']['holding']}")
+
+    # Move to object and pick it up
+    print(f"\n1. Moving forward to object...")
+    obs, reward, _, _, info = env.step(Action.FORWARD)
+    state = env.get_state_dict()
+    print(f"  Agent: {state['agent']['cell_id']}")
+
+    print(f"\n2. Picking up object...")
+    obs, reward, _, _, info = env.step(Action.PICKUP)
+    state = env.get_state_dict()
+    print(f"  Holding: {state['agent']['holding']}")
+    if state['agent']['holding']:
+        print(f"  ✓ Successfully picked up {state['agent']['holding']}!")
+
+    print(f"\n3. Moving with object...")
+    obs, reward, _, _, info = env.step(Action.FORWARD)
+    state = env.get_state_dict()
+    print(f"  Agent: {state['agent']['cell_id']} (still holding {state['agent']['holding']})")
+
+    print(f"\n4. Dropping object...")
+    obs, reward, _, _, info = env.step(Action.DROP)
+    state = env.get_state_dict()
+    print(f"  Holding: {state['agent']['holding']}")
+    print(f"  ✓ Object dropped at agent's location!")
+
+
+def distance_calculation_example():
+    """Demonstrate distance calculations on different tilings."""
+    print("\n" + "=" * 60)
+    print("DISTANCE CALCULATION EXAMPLE")
+    print("=" * 60)
+
+    for tiling_name in ["square", "hex", "triangle"]:
+        from multigrid.tilings import SquareTiling, HexTiling, TriangleTiling
+
+        tiling_class = {
+            "square": SquareTiling,
+            "hex": HexTiling,
+            "triangle": TriangleTiling
+        }[tiling_name]
+
+        tiling = tiling_class()
+        tiling.generate_graph(10, 10, seed=0)
+
+        # Calculate distance between two cells
+        cell_ids = list(tiling.cells.keys())
+        cell_a = cell_ids[10]
+        cell_b = cell_ids[50]
+
+        distance = tiling.distance(cell_a, cell_b)
+
+        print(f"\n{tiling_name.upper()} TILING:")
+        print(f"  Distance from {cell_a} to {cell_b}: {distance} hops")
+
+        # Get coordinates
+        pos_a = tiling.cell_to_canonical(cell_a)
+        pos_b = tiling.cell_to_canonical(cell_b)
+        print(f"  Canonical positions: {pos_a} -> {pos_b}")
+
+
+def main():
+    """Run all examples."""
+    print("\n" + "#" * 60)
+    print("# MultiGrid v1.1 - Usage Examples")
+    print("#" * 60)
+
+    basic_example()
+    multi_tiling_example()
+    object_interaction_example()
+    distance_calculation_example()
+
+    print("\n" + "#" * 60)
+    print("# All examples completed successfully!")
+    print("#" * 60)
+    print("\nTo run tests: python -m pytest tests/ -v")
+    print("To visualize: python visualize_grid.py")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/grid_visualization_hex.png b/src/v1_1/grid_visualization_hex.png
new file mode 100644
index 00000000..c415e678
Binary files /dev/null and b/src/v1_1/grid_visualization_hex.png differ
diff --git a/src/v1_1/grid_visualization_square.png b/src/v1_1/grid_visualization_square.png
new file mode 100644
index 00000000..d7c74b60
Binary files /dev/null and b/src/v1_1/grid_visualization_square.png differ
diff --git a/src/v1_1/grid_visualization_triangle.png b/src/v1_1/grid_visualization_triangle.png
new file mode 100644
index 00000000..a46cecc5
Binary files /dev/null and b/src/v1_1/grid_visualization_triangle.png differ
diff --git a/src/v1_1/gridworld/GRIDWORLD_BACKENDS.md b/src/v1_1/gridworld/GRIDWORLD_BACKENDS.md
new file mode 100644
index 00000000..dd7c3a00
--- /dev/null
+++ b/src/v1_1/gridworld/GRIDWORLD_BACKENDS.md
@@ -0,0 +1,575 @@
+# Gridworld Domain: Backend Reference
+
+This document describes the two gridworld backends available in MultiNet v1.1 for VLM/VLA evaluation on navigation and puzzle-solving tasks.
+
+## Overview
+
+The gridworld domain provides configurable puzzle environments where an agent must navigate, manipulate objects, and achieve goals. Two backend implementations are available:
+
+| Backend | Based On | Best For |
+|---------|----------|----------|
+| **MiniGridBackend** | gymnasium `minigrid` package | Standard square grid tasks, mature/tested |
+| **MultiGridBackend** | Custom implementation | Exotic tilings (hex, triangle), zones, teleporters |
+
+Both backends implement the same `AbstractGridBackend` interface, allowing seamless swapping for evaluation.
+
+---
+
+## MiniGridBackend
+
+### Description
+
+Wraps the gymnasium `minigrid` package (v3.0+), providing access to a mature, well-tested gridworld implementation. Recommended for standard square-grid puzzles.
+
+### Installation
+
+```bash
+pip install minigrid gymnasium
+```
+
+### Usage
+
+```python
+from gridworld.backends import MiniGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Load task specification
+spec = TaskSpecification.from_json("tasks/tier2/single_key_001.json")
+
+# Create and configure backend
+backend = MiniGridBackend(render_mode="rgb_array")
+backend.configure(spec)
+
+# Run episode
+obs, state, info = backend.reset(seed=42)
+
+for step in range(spec.max_steps):
+    action = policy(obs)  # Your policy here
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+
+    if terminated or truncated:
+        break
+
+backend.close()
+```
+
+### Supported Features
+
+| Feature | Support | Notes |
+|---------|---------|-------|
+| **Tilings** | | |
+| Square grid | ✓ | Standard 4-connected grid |
+| Hexagonal grid | ✗ | Not supported |
+| Triangle grid | ✗ | Not supported |
+| **Objects** | | |
+| Walls | ✓ | Impassable barriers |
+| Keys | ✓ | Colored, unlock matching doors |
+| Doors | ✓ | Locked/unlocked, colored |
+| Switches | ✓ | Via custom implementation |
+| Gates | ✓ | Via custom implementation |
+| Blocks (pushable) | ✓ | Can be pushed by agent |
+| Hazards (lava) | ✓ | Terminates episode |
+| Teleporters | ✗ | Not supported |
+| Zones | ✗ | Not supported |
+| **Features** | | |
+| Partial observability | ✓ | Agent sees limited view |
+| Full observability | ✓ | Agent sees entire grid |
+| Memory tasks | ✓ | Via MiniGrid environments |
+| RGB rendering | ✓ | High-quality sprites |
+
+### Action Space
+
+7 discrete actions (MiniGrid standard):
+
+| ID | Action | Description |
+|----|--------|-------------|
+| 0 | `turn_left` | Rotate 90° counter-clockwise |
+| 1 | `turn_right` | Rotate 90° clockwise |
+| 2 | `forward` | Move one cell in facing direction |
+| 3 | `pickup` | Pick up object in front |
+| 4 | `drop` | Drop held object |
+| 5 | `toggle` | Interact (open door, press switch) |
+| 6 | `done` | No-op / signal completion |
+
+### Rendering
+
+- Default observation: 64x64 RGB (configurable)
+- High-res render: Sprite-based, visually detailed
+- Partial observability: Shows only visible cells
+
+### Limitations
+
+- Square grids only
+- No zone/target area objects
+- No teleporter mechanics
+- Tied to MiniGrid's object set
+
+---
+
+## MultiGridBackend
+
+### Description
+
+Custom implementation supporting arbitrary grid topologies (square, hexagonal, triangle) with an extended object set. Built on a topology-agnostic adjacency graph that generalizes to any tiling pattern.
+
+### Usage
+
+```python
+from gridworld.backends import MultiGridBackend
+from gridworld.task_spec import TaskSpecification
+
+# Load task specification
+spec = TaskSpecification.from_json("tasks/tier2/single_key_001.json")
+
+# Create with exotic tiling
+backend = MultiGridBackend(
+    tiling="triangle",  # or "square", "hex"
+    render_mode="rgb_array"
+)
+backend.configure(spec)
+
+# Run episode (same interface as MiniGridBackend)
+obs, state, info = backend.reset(seed=42)
+
+for step in range(spec.max_steps):
+    action = policy(obs)
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+
+    if terminated or truncated:
+        break
+
+backend.close()
+```
+
+### Supported Features
+
+| Feature | Support | Notes |
+|---------|---------|-------|
+| **Tilings** | | |
+| Square grid | ✓ | 4-connected (N/E/S/W) |
+| Hexagonal grid | ✓ | 6-connected (pointy-top) |
+| Triangle grid | ✓ | 3-connected (within hex subdivision) |
+| **Objects** | | |
+| Walls | ✓ | Impassable barriers |
+| Keys | ✓ | Colored, unlock matching doors |
+| Doors | ✓ | Locked/unlocked, colored |
+| Switches | ✓ | Toggle/hold/one-shot modes |
+| Gates | ✓ | Controlled by switches |
+| Blocks (movable) | ✓ | Can be picked up or pushed |
+| Hazards | ✓ | Terminates episode (lava, spikes, etc.) |
+| Teleporters | ✓ | Linked pairs, cooldown support |
+| Zones | ✓ | Target areas (overlappable) |
+| **Features** | | |
+| Partial observability | ✗ | Planned for future |
+| Full observability | ✓ | Agent sees entire grid |
+| RGB rendering | ✓ | Vector-based (PIL) |
+
+### Action Space
+
+9 discrete actions (extended from MiniGrid):
+
+| ID | Action | Description |
+|----|--------|-------------|
+| 0 | `forward` | Move in facing direction |
+| 1 | `backward` | Move opposite to facing |
+| 2 | `turn_left` | Rotate counter-clockwise |
+| 3 | `turn_right` | Rotate clockwise |
+| 4 | `pickup` | Pick up object at/in front of agent |
+| 5 | `drop` | Drop held object |
+| 6 | `toggle` | Interact (unlock door with key, activate switch) |
+| 7 | `push` | Push object in facing direction |
+| 8 | `wait` | No-op |
+
+**Note:** When using MultiGridBackend through the standard 7-action interface, actions are mapped:
+- MiniGrid action 5 (toggle) → MultiGrid TOGGLE
+- MiniGrid action 6 (done) → MultiGrid WAIT
+
+### Tiling Types
+
+#### Square Tiling
+```
+┌───┬───┬───┐
+│   │   │   │
+├───┼───┼───┤    4 directions: N, E, S, W
+│   │ A │   │    Agent can face/move in 4 directions
+├───┼───┼───┤
+│   │   │   │
+└───┴───┴───┘
+```
+
+#### Hexagonal Tiling
+```
+   ╱╲   ╱╲
+  ╱  ╲ ╱  ╲
+ │    │    │     6 directions: N, NE, SE, S, SW, NW
+ │  A │    │     Agent can face/move in 6 directions
+  ╲  ╱ ╲  ╱
+   ╲╱   ╲╱
+```
+
+#### Triangle Tiling
+```
+    ╱╲
+   ╱  ╲
+  ╱ A  ╲         3 directions: edge0, edge1, edge2
+ ╱──────╲        Agent can face/move in 3 directions
+```
+
+Each hexagon is subdivided into 6 triangles, creating a denser navigation graph.
+
+### Object Types
+
+#### Key
+```python
+{
+    "id": "key_blue",
+    "type": "key",
+    "color": "blue",
+    "position": {"x": 0.3, "y": 0.5}
+}
+```
+- Can be picked up with PICKUP action
+- Used to unlock doors of matching color via TOGGLE
+- Optionally consumed on use (configurable via `rules.key_consumption`)
+
+#### Door
+```python
+{
+    "id": "door_blue",
+    "type": "door",
+    "color": "blue",
+    "position": {"x": 0.5, "y": 0.5},
+    "is_locked": true
+}
+```
+- Blocks movement when locked/closed
+- TOGGLE with matching key unlocks
+- TOGGLE again opens/closes (when unlocked)
+
+#### Switch
+```python
+{
+    "id": "switch_1",
+    "type": "switch",
+    "color": "yellow",
+    "position": {"x": 0.3, "y": 0.3},
+    "switch_type": "toggle",  // "toggle", "hold", or "one_shot"
+    "controls": ["gate_1", "gate_2"],
+    "initial_state": false
+}
+```
+- **toggle**: Each TOGGLE flips state
+- **hold**: Active only while agent stands on switch
+- **one_shot**: Can only be activated once
+
+#### Gate
+```python
+{
+    "id": "gate_1",
+    "type": "gate",
+    "color": "yellow",
+    "position": {"x": 0.5, "y": 0.5},
+    "is_open": false,
+    "controlled_by": ["switch_1"],
+    "require_all": false  // true = AND logic, false = OR logic
+}
+```
+- Opens/closes based on controlling switch states
+- Blocks movement when closed
+
+#### Hazard
+```python
+{
+    "id": "lava_1",
+    "type": "hazard",
+    "color": "red",
+    "position": {"x": 0.7, "y": 0.7},
+    "hazard_type": "lava",  // for rendering
+    "damage": 1.0
+}
+```
+- Agent can step on hazards
+- Terminates episode immediately
+
+#### Teleporter
+```python
+{
+    "id": "tele_1",
+    "type": "teleporter",
+    "color": "purple",
+    "position": {"x": 0.1, "y": 0.1},
+    "linked_to": "tele_2",
+    "cooldown": 1
+}
+```
+- Comes in linked pairs
+- Agent stepping on teleporter is transported to linked destination
+- Cooldown prevents immediate re-teleportation
+
+#### Zone
+```python
+{
+    "id": "target_zone",
+    "type": "zone",
+    "color": "cyan",
+    "position": {"x": 0.9, "y": 0.9},
+    "radius_hops": 1
+}
+```
+- Overlappable target area
+- Useful for goal regions, spawn areas, etc.
+
+#### Movable (Block/Box)
+```python
+{
+    "id": "box_1",
+    "type": "movable",
+    "color": "green",
+    "position": {"x": 0.5, "y": 0.5}
+}
+```
+- Can be picked up (PICKUP) or pushed (PUSH)
+- Blocks movement when in cell
+
+#### Wall
+```python
+{
+    "id": "wall_1",
+    "type": "wall",
+    "color": "grey",
+    "position": {"x": 0.5, "y": 0.5}
+}
+```
+- Impassable barrier
+- Cannot be picked up or pushed
+
+### Rendering
+
+- Observation: 64x64 RGB (for VLM input)
+- High-res render: 640x640 RGB (for visualization)
+- Vector-based rendering using PIL
+- Distinct visual for each object type
+
+### Coordinate System
+
+MultiGrid uses **canonical coordinates** (0.0 to 1.0) that map to grid cells:
+
+```python
+# Canonical (x, y) → Grid cell
+position = {"x": 0.3, "y": 0.5}  # 30% across, 50% down
+
+# The tiling converts this to the nearest cell
+cell_id = tiling.canonical_to_cell(0.3, 0.5)  # e.g., "sq_2_1"
+```
+
+This allows task specifications to be tiling-agnostic.
+
+---
+
+## Task Specification Format
+
+Both backends use the same JSON task specification format:
+
+```json
+{
+    "task_id": "puzzle_001",
+    "version": "1.0",
+    "seed": 42,
+    "difficulty_tier": 2,
+    "description": "Collect the blue key to unlock the door",
+
+    "maze": {
+        "dimensions": [8, 8],
+        "walls": [
+            {"x": 0, "y": 0}, {"x": 0, "y": 1}, ...
+        ],
+        "start": {"x": 1, "y": 1},
+        "goal": {"x": 6, "y": 6}
+    },
+
+    "mechanisms": {
+        "keys": [
+            {"id": "key_blue", "position": {"x": 3, "y": 4}, "color": "blue"}
+        ],
+        "doors": [
+            {"id": "door_blue", "position": {"x": 5, "y": 5},
+             "requires_key": "blue", "initial_state": "locked"}
+        ],
+        "switches": [],
+        "gates": [],
+        "blocks": [],
+        "hazards": [],
+        "teleporters": []
+    },
+
+    "rules": {
+        "key_consumption": true,
+        "switch_type": "toggle"
+    },
+
+    "goal": {
+        "type": "reach_position",
+        "target": {"x": 6, "y": 6}
+    },
+
+    "max_steps": 100
+}
+```
+
+### Goal Types
+
+| Type | Description | Parameters |
+|------|-------------|------------|
+| `reach_position` | Agent reaches target cell | `target: {x, y}` |
+| `collect_all` | Agent collects all specified items | `target_ids: [...]` |
+| `push_block_to` | Push blocks to target positions | `target_ids, target_positions` |
+| `survive_steps` | Survive for N steps | `steps: N` |
+
+---
+
+## Choosing a Backend
+
+### Use MiniGridBackend when:
+- Working with standard square grids
+- Need partial observability
+- Want mature, well-tested implementation
+- Using existing MiniGrid environments
+- Don't need zones or teleporters
+
+### Use MultiGridBackend when:
+- Need hexagonal or triangle grids
+- Need zone/target area objects
+- Need teleporter mechanics
+- Want extended action space (backward, push)
+- Building custom puzzle types
+
+### Factory Function
+
+```python
+from gridworld.backends import get_backend
+
+# Standard square grid
+backend = get_backend("minigrid", render_mode="rgb_array")
+
+# Custom with exotic tiling
+backend = get_backend("multigrid", tiling="hex", render_mode="rgb_array")
+```
+
+---
+
+## GridState
+
+Both backends return a `GridState` object providing backend-agnostic state access:
+
+```python
+@dataclass
+class GridState:
+    agent_position: tuple[int, int]  # Grid coordinates
+    agent_direction: int             # 0=right, 1=down, 2=left, 3=up
+    agent_carrying: Optional[str]    # ID of held object
+
+    step_count: int
+    max_steps: int
+    terminated: bool
+    truncated: bool
+    reward: float
+
+    open_doors: set[str]       # IDs of open doors
+    collected_keys: set[str]   # IDs of collected keys
+    active_switches: set[str]  # IDs of active switches
+    open_gates: set[str]       # IDs of open gates
+    block_positions: dict[str, tuple[int, int]]
+
+    goal_reached: bool
+```
+
+---
+
+## Difficulty Tiers
+
+Tasks are organized into difficulty tiers:
+
+| Tier | Description | Mechanisms |
+|------|-------------|------------|
+| 1 | Navigation | Walls only, pathfinding |
+| 2 | Linear Dependencies | Key → Door |
+| 3 | Multi-Mechanism | Keys + Doors + Switches + Gates |
+| 4 | Irreversibility | Pushable blocks, consumable items |
+| 5 | Hidden Information | Must infer rules, memory tasks |
+
+---
+
+## Example: Running Evaluation
+
+```python
+from gridworld.backends import get_backend
+from gridworld.task_spec import TaskSpecification
+from gridworld.runner import GridRunner
+
+# Load tasks
+tasks = [
+    TaskSpecification.from_json(f"tasks/tier{i}/puzzle_{j:03d}.json")
+    for i in range(1, 6)
+    for j in range(1, 4)
+]
+
+# Create runner
+runner = GridRunner(backend="minigrid", render_mode="rgb_array")
+
+# Evaluate
+results = []
+for spec in tasks:
+    result = runner.run_episode(spec, policy_fn=your_policy, seed=42)
+    results.append({
+        "task_id": spec.task_id,
+        "success": result.success,
+        "steps": result.steps_taken,
+        "reward": result.total_reward
+    })
+
+# Compute metrics
+success_rate = sum(r["success"] for r in results) / len(results)
+print(f"Success rate: {success_rate:.2%}")
+```
+
+---
+
+## Files Reference
+
+```
+src/v1_1/gridworld/
+├── __init__.py
+├── task_spec.py              # TaskSpecification dataclass
+├── task_parser.py            # JSON → environment parser
+├── actions.py                # Action space definitions
+├── custom_env.py             # CustomMiniGridEnv class
+├── backends/
+│   ├── __init__.py           # get_backend() factory
+│   ├── base.py               # AbstractGridBackend interface
+│   ├── minigrid_backend.py   # MiniGrid wrapper
+│   └── multigrid_backend.py  # MultiGrid adapter
+├── runner/
+│   └── grid_runner.py        # Episode execution
+├── envs/
+│   └── tier_envs.py          # Pre-configured environments
+└── tasks/                    # Sample task JSON files
+    ├── tier1/
+    ├── tier2/
+    ├── tier3/
+    ├── tier4/
+    └── tier5/
+
+src/v1_1/multigrid/
+├── __init__.py
+├── core.py                   # Cell, TilingGraph
+├── base.py                   # Tiling base class
+├── tilings.py                # Square, Hex, Triangle tilings
+├── agent.py                  # AgentState, Action enum
+├── world.py                  # WorldState, execute_action()
+├── goals.py                  # Goal predicates
+├── rendering.py              # PIL-based rendering
+├── env.py                    # MultiGridEnv (gymnasium compatible)
+└── objects/
+    ├── base.py               # WorldObj, ObjectRegistry
+    └── builtin.py            # All object types
+```
diff --git a/src/v1_1/gridworld/__init__.py b/src/v1_1/gridworld/__init__.py
new file mode 100644
index 00000000..904989de
--- /dev/null
+++ b/src/v1_1/gridworld/__init__.py
@@ -0,0 +1,52 @@
+"""
+MiniGrid/GridWorld Domain for MultiNet v1.1
+
+This module provides a complete gridworld evaluation domain with:
+- Task specification schema (JSON) for defining puzzles
+- Task parser that creates MiniGrid environments from specs
+- Backend abstraction for pluggable grid implementations
+- Episode runner for trajectory collection
+- Evaluation module following GenESIS patterns
+"""
+
+from .task_spec import (
+    Position,
+    KeySpec,
+    DoorSpec,
+    SwitchSpec,
+    GateSpec,
+    BlockSpec,
+    HazardSpec,
+    TeleporterSpec,
+    MazeLayout,
+    MechanismSet,
+    Rules,
+    GoalSpec,
+    TaskSpecification,
+)
+from .task_parser import TaskParser
+from .actions import MiniGridActions, ACTION_NAMES, ACTION_DESCRIPTIONS
+
+
+__all__ = [
+    # Task specification
+    "Position",
+    "KeySpec",
+    "DoorSpec",
+    "SwitchSpec",
+    "GateSpec",
+    "BlockSpec",
+    "HazardSpec",
+    "TeleporterSpec",
+    "MazeLayout",
+    "MechanismSet",
+    "Rules",
+    "GoalSpec",
+    "TaskSpecification",
+    # Parser
+    "TaskParser",
+    # Actions
+    "MiniGridActions",
+    "ACTION_NAMES",
+    "ACTION_DESCRIPTIONS",
+]
diff --git a/src/v1_1/gridworld/actions.py b/src/v1_1/gridworld/actions.py
new file mode 100644
index 00000000..2927831a
--- /dev/null
+++ b/src/v1_1/gridworld/actions.py
@@ -0,0 +1,112 @@
+"""
+MiniGrid Action Space Definitions
+
+Standard 7-action discrete space matching MiniGrid's default Actions enum.
+"""
+
+from enum import IntEnum
+from typing import Dict
+
+
+class MiniGridActions(IntEnum):
+    """MiniGrid action space (7 discrete actions)."""
+    TURN_LEFT = 0
+    TURN_RIGHT = 1
+    MOVE_FORWARD = 2
+    PICKUP = 3
+    DROP = 4
+    TOGGLE = 5  # Interact: open door, press switch, etc.
+    DONE = 6  # No-op / wait
+
+
+# Human-readable action names
+ACTION_NAMES: Dict[int, str] = {
+    0: "turn_left",
+    1: "turn_right",
+    2: "move_forward",
+    3: "pickup",
+    4: "drop",
+    5: "toggle",
+    6: "done",
+}
+
+# Detailed action descriptions for VLM prompts
+ACTION_DESCRIPTIONS: Dict[int, str] = {
+    0: "Turn left (rotate 90° counter-clockwise)",
+    1: "Turn right (rotate 90° clockwise)",
+    2: "Move forward (one cell in facing direction)",
+    3: "Pick up (grab object in front of agent)",
+    4: "Drop (release held object)",
+    5: "Toggle (interact with object in front: open/close door, press switch)",
+    6: "Done/Wait (no action, stay in place)",
+}
+
+# Short descriptions for compact formats
+ACTION_SHORT: Dict[int, str] = {
+    0: "Left",
+    1: "Right",
+    2: "Forward",
+    3: "Pickup",
+    4: "Drop",
+    5: "Toggle",
+    6: "Wait",
+}
+
+# Action space as dict for GenESIS format
+ACTION_SPACE_DICT: Dict[int, tuple] = {
+    0: ("Turn left", {0: "Rotate 90° counter-clockwise"}),
+    1: ("Turn right", {1: "Rotate 90° clockwise"}),
+    2: ("Move forward", {2: "Move one cell in facing direction"}),
+    3: ("Pick up", {3: "Grab object directly in front"}),
+    4: ("Drop", {4: "Release currently held object"}),
+    5: ("Toggle/Interact", {5: "Interact with door, switch, or object in front"}),
+    6: ("Done/Wait", {6: "No operation, stay in place"}),
+}
+
+# Navigation-only subset (Tier 1)
+NAVIGATION_ACTIONS = {
+    MiniGridActions.TURN_LEFT,
+    MiniGridActions.TURN_RIGHT,
+    MiniGridActions.MOVE_FORWARD,
+    MiniGridActions.DONE,
+}
+
+# Full action set (Tiers 2+)
+FULL_ACTIONS = set(MiniGridActions)
+
+
+def action_to_name(action: int) -> str:
+    """Convert action ID to human-readable name."""
+    return ACTION_NAMES.get(action, f"unknown_{action}")
+
+
+def name_to_action(name: str) -> int:
+    """Convert action name to ID."""
+    name_lower = name.lower().strip()
+    for action_id, action_name in ACTION_NAMES.items():
+        if action_name == name_lower:
+            return action_id
+    # Try partial matching
+    for action_id, action_name in ACTION_NAMES.items():
+        if name_lower in action_name or action_name in name_lower:
+            return action_id
+    raise ValueError(f"Unknown action name: {name}")
+
+
+def get_valid_actions(tier: int) -> set[int]:
+    """Get valid actions for a given difficulty tier."""
+    if tier == 1:
+        # Navigation only - no pickup, drop, or toggle needed
+        return NAVIGATION_ACTIONS
+    else:
+        # Full action space for tiers 2+
+        return FULL_ACTIONS
+
+
+def format_action_space_for_prompt(tier: int = 2) -> str:
+    """Format action space description for VLM prompts."""
+    valid_actions = get_valid_actions(tier)
+    lines = []
+    for action_id in sorted(valid_actions):
+        lines.append(f"  {action_id}: {ACTION_DESCRIPTIONS[action_id]}")
+    return "\n".join(lines)
diff --git a/src/v1_1/gridworld/backends/__init__.py b/src/v1_1/gridworld/backends/__init__.py
new file mode 100644
index 00000000..198ae7f8
--- /dev/null
+++ b/src/v1_1/gridworld/backends/__init__.py
@@ -0,0 +1,75 @@
+"""
+Backend Abstraction for Grid Environments
+
+Provides pluggable backend implementations for gridworld environments.
+
+Available Backends:
+    MiniGridBackend: Standard MiniGrid (gymnasium) implementation
+        - Square grid only
+        - Full mechanism set (keys, doors, switches, gates, blocks, hazards, teleporters)
+        - Partial observability: view cone + fog of war
+        - Well tested, production-ready
+
+    MultiGridBackend: Custom multigrid with exotic tilings
+        - Square, hexagonal, triangle, 3-4-6-4, 4-8-8 tilings
+        - Full mechanism set (keys, doors, switches, gates, hazards, teleporters, zones)
+        - Partial observability: view cone + fog of war (BFS-based on adjacency graph)
+
+Feature Comparison (see base.py for full table):
+    - MiniGrid: Best for standard square grid tasks, more mature/tested
+    - MultiGrid: Required for hex/triangle tilings or zones/teleporters
+
+Usage:
+    from gridworld.backends import get_backend
+
+    # Standard square grid
+    backend = get_backend("minigrid", render_mode="rgb_array")
+
+    # Exotic tilings (hex, triangle)
+    backend = get_backend("multigrid", tiling="triangle", render_mode="rgb_array")
+"""
+
+from .base import AbstractGridBackend, GridState
+from .minigrid_backend import MiniGridBackend
+
+# MultiGridBackend is optional - requires multigrid module
+try:
+    from .multigrid_backend import MultiGridBackend
+    _MULTIGRID_AVAILABLE = True
+except ImportError:
+    MultiGridBackend = None
+    _MULTIGRID_AVAILABLE = False
+
+__all__ = [
+    "AbstractGridBackend",
+    "GridState",
+    "MiniGridBackend",
+    "MultiGridBackend",
+]
+
+
+def get_backend(name: str, **kwargs) -> AbstractGridBackend:
+    """
+    Get a backend instance by name.
+
+    Args:
+        name: Backend name ("minigrid" or "multigrid")
+        **kwargs: Arguments passed to backend constructor
+
+    Returns:
+        Backend instance
+
+    Raises:
+        ValueError: If backend name is unknown or unavailable
+    """
+    if name == "minigrid":
+        return MiniGridBackend(**kwargs)
+    elif name == "multigrid":
+        if not _MULTIGRID_AVAILABLE:
+            raise ValueError(
+                "MultiGridBackend not available. "
+                "Ensure multigrid module is accessible."
+            )
+        return MultiGridBackend(**kwargs)
+    else:
+        raise ValueError(f"Unknown backend: {name}")
diff --git a/src/v1_1/gridworld/backends/base.py b/src/v1_1/gridworld/backends/base.py
new file mode 100644
index 00000000..ed8ff0f6
--- /dev/null
+++ b/src/v1_1/gridworld/backends/base.py
@@ -0,0 +1,292 @@
+"""
+Abstract Base Class for Grid Backends
+
+Defines the interface that all grid environment backends must implement.
+This allows swapping between MiniGrid (gymnasium) and custom MultiGrid implementations.
+
+BACKEND ABSTRACTION LAYER
+=========================
+
+This module provides a pluggable backend system for gridworld environments.
+Any grid implementation (MiniGrid, custom MultiGrid with square/hex/triangle tilings,
+or future backends) can be used with the same runner and evaluation pipeline.
+
+Architecture:
+    TaskSpecification (JSON)
+           │
+           ▼
+    ┌─────────────────────┐
+    │  AbstractGridBackend │ ◄── This interface
+    └─────────┬───────────┘
+         ┌────┴────┐
+         ▼         ▼
+    ┌─────────┐ ┌─────────────┐
+    │MiniGrid │ │ MultiGrid   │
+    │Backend  │ │ Backend     │
+    │(MVP)    │ │(Custom)     │
+    └─────────┘ └─────────────┘
+
+Usage:
+    # Option 1: Use MiniGridBackend (gymnasium-based, recommended for MVP)
+    from gridworld.backends import MiniGridBackend
+    backend = MiniGridBackend(render_mode="rgb_array")
+    backend.configure(task_spec)
+    obs, state, info = backend.reset(seed=42)
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+
+    # Option 2: Use MultiGridBackend (custom tilings: square, hex, triangle)
+    from gridworld.backends import MultiGridBackend
+    backend = MultiGridBackend(tiling="triangle", render_mode="rgb_array")
+    backend.configure(task_spec)
+    # ... same interface as above
+
+Implementing a New Backend:
+    1. Create a new class that inherits from AbstractGridBackend
+    2. Implement all abstract methods (see docstrings below)
+    3. The backend must:
+       - Accept TaskSpecification objects via configure()
+       - Return consistent GridState objects from reset() and step()
+       - Provide RGB observations via render()
+       - Support the 7-action MiniGrid action space (0-6)
+
+GridState:
+    The GridState dataclass provides a backend-agnostic snapshot of environment
+    state for evaluation and comparison. All backends must populate this correctly.
+
+Action Space:
+    All backends use the standard 7-action discrete space:
+    0: turn_left, 1: turn_right, 2: forward, 3: pickup, 4: drop, 5: toggle, 6: done/wait
+
+FEATURE COMPARISON
+==================
+
+The two backends have different feature support. Choose based on your needs:
+
+    Feature              | MiniGridBackend | MultiGridBackend
+    ---------------------|-----------------|------------------
+    Tilings:             |                 |
+      Square grid        | ✓               | ✓
+      Hexagonal grid     | ✗               | ✓
+      Triangle grid      | ✗               | ✓
+      3-4-6-4            | ✗               | ✓
+      4-8-8              | ✗               | ✓
+    Objects:             |                 |
+      Walls              | ✓               | ✓
+      Movable/Blocks     | ✓               | ✓
+      Keys               | ✓               | ✓
+      Doors              | ✓               | ✓
+      Switches           | ✓               | ✓
+      Gates              | ✓               | ✓
+      Hazards (Lava)     | ✓               | ✓
+      Teleporters        | ✓               | ✓
+      Zones (targets)    | ✗               | ✓
+    Features:            |                 |
+      Partial obs (cone) | ✓               | ✓
+      Fog of war         | ✓               | ✓
+      Mature/tested      | ✓               | ✗ (newer)
+
+    Recommendation:
+    - Use MiniGridBackend for standard square grid tasks (more mature)
+    - Use MultiGridBackend for exotic tilings (hex/triangle) or zones
+
+See Also:
+    - minigrid_backend.py: MiniGrid (gymnasium) implementation
+    - multigrid_backend.py: Custom MultiGrid implementation with exotic tilings
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional, Any
+
+import numpy as np
+
+from ..task_spec import TaskSpecification, Position
+
+
+@dataclass
+class GridState:
+    """
+    Represents the current state of a grid environment.
+
+    This is a backend-agnostic representation of the environment state
+    that can be used for evaluation and comparison.
+    """
+    # Agent state
+    agent_position: tuple[int, int]
+    agent_direction: int  # 0=right, 1=down, 2=left, 3=up
+    agent_carrying: Optional[str] = None  # ID or color of carried object
+
+    # Environment state
+    step_count: int = 0
+    max_steps: int = 100
+    terminated: bool = False
+    truncated: bool = False
+    reward: float = 0.0
+
+    # Mechanism states
+    open_doors: set[str] = field(default_factory=set)  # IDs of open doors
+    collected_keys: set[str] = field(default_factory=set)  # IDs of collected keys
+    active_switches: set[str] = field(default_factory=set)  # IDs of active switches
+    open_gates: set[str] = field(default_factory=set)  # IDs of open gates
+    block_positions: dict[str, tuple[int, int]] = field(default_factory=dict)  # block_id -> position
+    teleporter_cooldowns: dict[str, int] = field(default_factory=dict)  # teleporter_id -> cooldown
+
+    # Goal state
+    goal_reached: bool = False
+
+    # Observability state
+    observability_mode: str = "full"  # "full", "view_cone", "fog_of_war"
+    visible_cells: set[tuple[int, int]] = field(default_factory=set)  # Currently visible cells
+    explored_cells: set[tuple[int, int]] = field(default_factory=set)  # All ever-seen cells (fog_of_war)
+
+    def to_dict(self) -> dict:
+        """Convert state to dictionary for serialization."""
+        return {
+            "agent_position": list(self.agent_position),
+            "agent_direction": self.agent_direction,
+            "agent_carrying": self.agent_carrying,
+            "step_count": self.step_count,
+            "max_steps": self.max_steps,
+            "terminated": self.terminated,
+            "truncated": self.truncated,
+            "reward": self.reward,
+            "open_doors": list(self.open_doors),
+            "collected_keys": list(self.collected_keys),
+            "active_switches": list(self.active_switches),
+            "open_gates": list(self.open_gates),
+            "block_positions": {k: list(v) for k, v in self.block_positions.items()},
+            "teleporter_cooldowns": self.teleporter_cooldowns,
+            "goal_reached": self.goal_reached,
+            "observability_mode": self.observability_mode,
+            "visible_cells": [list(c) for c in self.visible_cells],
+            "explored_cells": [list(c) for c in self.explored_cells],
+        }
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "GridState":
+        """Create state from dictionary."""
+        return cls(
+            agent_position=tuple(d["agent_position"]),
+            agent_direction=d["agent_direction"],
+            agent_carrying=d.get("agent_carrying"),
+            step_count=d.get("step_count", 0),
+            max_steps=d.get("max_steps", 100),
+            terminated=d.get("terminated", False),
+            truncated=d.get("truncated", False),
+            reward=d.get("reward", 0.0),
+            open_doors=set(d.get("open_doors", [])),
+            collected_keys=set(d.get("collected_keys", [])),
+            active_switches=set(d.get("active_switches", [])),
+            open_gates=set(d.get("open_gates", [])),
+            block_positions={k: tuple(v) for k, v in d.get("block_positions", {}).items()},
+            teleporter_cooldowns=d.get("teleporter_cooldowns", {}),
+            goal_reached=d.get("goal_reached", False),
+            observability_mode=d.get("observability_mode", "full"),
+            visible_cells={tuple(c) for c in d.get("visible_cells", [])},
+            explored_cells={tuple(c) for c in d.get("explored_cells", [])},
+        )
+
+
+class AbstractGridBackend(ABC):
+    """
+    Abstract interface for grid environment backends.
+
+    Implementations provide the actual environment logic while
+    maintaining a consistent interface for the runner and evaluation.
+    """
+
+    def __init__(self):
+        self.task_spec: Optional[TaskSpecification] = None
+        self._configured = False
+
+    @abstractmethod
+    def configure(self, task_spec: TaskSpecification) -> None:
+        """
+        Configure the backend with a task specification.
+
+        Args:
+            task_spec: The task specification defining the puzzle
+        """
+        pass
+
+    @abstractmethod
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, GridState, dict]:
+        """
+        Reset the environment to initial state.
+
+        Args:
+            seed: Random seed for reproducibility
+
+        Returns:
+            observation: The initial observation (RGB image)
+            state: The initial GridState
+            info: Additional information dictionary
+        """
+        pass
+
+    @abstractmethod
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, GridState, dict]:
+        """
+        Execute one action in the environment.
+
+        Args:
+            action: The action to execute (0-6 for MiniGrid actions)
+
+        Returns:
+            observation: The new observation (RGB image)
+            reward: The reward for this step
+            terminated: Whether the episode ended (goal reached or failed)
+            truncated: Whether the episode was cut short (max steps)
+            state: The new GridState
+            info: Additional information dictionary
+        """
+        pass
+
+    @abstractmethod
+    def render(self) -> np.ndarray:
+        """
+        Render the current environment state.
+
+        Returns:
+            RGB image array of shape (H, W, 3)
+        """
+        pass
+
+    @abstractmethod
+    def get_mission_text(self) -> str:
+        """
+        Get the mission/goal description text.
+
+        Returns:
+            Human-readable mission description
+        """
+        pass
+
+    @abstractmethod
+    def get_state(self) -> GridState:
+        """
+        Get the current environment state.
+
+        Returns:
+            Current GridState
+        """
+        pass
+
+    @property
+    def is_configured(self) -> bool:
+        """Whether the backend has been configured with a task spec."""
+        return self._configured
+
+    @property
+    def action_space_size(self) -> int:
+        """Size of the action space (7 for MiniGrid)."""
+        return 7
+
+    @property
+    def observation_shape(self) -> tuple[int, int, int]:
+        """Shape of observations (H, W, C)."""
+        return (64, 64, 3)  # Default, can be overridden
+
+    def close(self) -> None:
+        """Clean up resources."""
+        pass
diff --git a/src/v1_1/gridworld/backends/minigrid_backend.py b/src/v1_1/gridworld/backends/minigrid_backend.py
new file mode 100644
index 00000000..a1ca5981
--- /dev/null
+++ b/src/v1_1/gridworld/backends/minigrid_backend.py
@@ -0,0 +1,344 @@
+"""
+MiniGrid Backend Implementation
+
+Wraps the gymnasium MiniGrid environment with the AbstractGridBackend interface.
+"""
+
+from typing import Optional
+
+import numpy as np
+
+from ..task_spec import TaskSpecification
+from ..task_parser import TaskParser
+from ..custom_env import CustomMiniGridEnv
+from .base import AbstractGridBackend, GridState
+
+
+class MiniGridBackend(AbstractGridBackend):
+    """
+    Backend implementation using gymnasium's MiniGrid package.
+
+    This is the MVP backend that wraps MiniGrid environments and
+    provides the standard AbstractGridBackend interface.
+    """
+
+    def __init__(self, render_mode: Optional[str] = "rgb_array"):
+        """
+        Initialize the MiniGrid backend.
+
+        Args:
+            render_mode: Rendering mode ("human", "rgb_array", or None)
+        """
+        super().__init__()
+        self.render_mode = render_mode
+        self.parser = TaskParser(render_mode=render_mode)
+        self.env: Optional[CustomMiniGridEnv] = None
+        self._last_obs = None
+
+    def configure(self, task_spec: TaskSpecification) -> None:
+        """
+        Configure the backend with a task specification.
+
+        Args:
+            task_spec: The task specification defining the puzzle
+        """
+        self.task_spec = task_spec
+        self._configured = True
+        # Environment will be created on reset
+
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, GridState, dict]:
+        """
+        Reset the environment to initial state.
+
+        This method creates a fresh environment from the configured task specification.
+        It leverages the TaskParser to handle environment creation and grid population.
+
+        IMPORTANT DESIGN NOTE - Why we don't call env.reset() here:
+        The TaskParser.parse() method internally calls env.reset() to initialize the
+        grid structure, then populates it with task-specific objects. If we were to
+        call reset() again here, it would wipe out all the carefully placed objects
+        (keys, doors, switches, etc.) and leave us with an empty grid!
+
+        This is a deliberate architectural choice:
+        - TaskParser handles: environment creation + reset + population
+        - Backend reset() handles: triggering parser + extracting observations/state
+
+        Args:
+            seed: Random seed for reproducibility. Passed through to the parser
+                  to ensure deterministic environment initialization.
+
+        Returns:
+            observation: The initial RGB observation (image array)
+            state: The initial GridState containing agent position, mechanism states, etc.
+            info: Additional information dictionary (currently empty, for future use)
+
+        Raises:
+            RuntimeError: If configure() has not been called before reset()
+        """
+        if not self._configured:
+            raise RuntimeError("Backend must be configured before reset")
+
+        # Create fresh environment from task spec
+        # CRITICAL: parser.parse() internally calls env.reset() and populates the grid.
+        # We must NOT call reset() again here or it will wipe out all objects!
+        self.env = self.parser.parse(self.task_spec, seed=seed)
+
+        # Generate observation (env is already reset and populated by parser)
+        obs = self.env.gen_obs()
+        info = {}
+
+        # Get RGB observation
+        # MiniGrid supports two rendering modes: direct RGB or symbolic observation
+        if self.render_mode == "rgb_array":
+            # Use environment's built-in renderer for high-quality RGB output
+            rgb_obs = self.env.render()
+        else:
+            # Convert symbolic observation to RGB
+            rgb_obs = self._obs_to_rgb(obs)
+
+        # Cache observation for later render() calls
+        self._last_obs = rgb_obs
+
+        # Extract backend-agnostic GridState for evaluation
+        state = self._get_grid_state()
+
+        # Include partial observation data in info
+        obs_mode = self.task_spec.rules.observability if self.task_spec else "full"
+        if obs_mode != "full":
+            info["partial_obs"] = obs  # The MiniGrid symbolic partial observation
+
+        return rgb_obs, state, info
+
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, GridState, dict]:
+        """
+        Execute one action in the environment.
+
+        Args:
+            action: The action to execute (0-6 for MiniGrid actions)
+
+        Returns:
+            observation: The new observation (RGB image)
+            reward: The reward for this step
+            terminated: Whether the episode ended
+            truncated: Whether the episode was cut short
+            state: The new GridState
+            info: Additional information dictionary
+        """
+        if self.env is None:
+            raise RuntimeError("Environment not initialized. Call reset() first.")
+
+        # Execute action
+        obs, reward, terminated, truncated, info = self.env.step(action)
+
+        # Update fog-of-war explored cells after movement
+        obs_mode = self.task_spec.rules.observability if self.task_spec else "full"
+        if obs_mode in ("view_cone", "fog_of_war"):
+            self.env.update_explored()
+
+        # Get RGB observation
+        if self.render_mode == "rgb_array":
+            rgb_obs = self.env.render()
+        else:
+            rgb_obs = self._obs_to_rgb(obs)
+
+        self._last_obs = rgb_obs
+        state = self._get_grid_state()
+        state.terminated = terminated
+        state.truncated = truncated
+        state.reward = reward
+        state.goal_reached = terminated and reward > 0
+
+        return rgb_obs, reward, terminated, truncated, state, info
+
+    def render(self) -> np.ndarray:
+        """
+        Render the current environment state.
+
+        Returns:
+            RGB image array of shape (H, W, 3)
+        """
+        if self.env is None:
+            raise RuntimeError("Environment not initialized. Call reset() first.")
+
+        if self.render_mode == "rgb_array":
+            return self.env.render()
+        elif self._last_obs is not None:
+            return self._last_obs
+        else:
+            # Return placeholder
+            return np.zeros((64, 64, 3), dtype=np.uint8)
+
+    def get_mission_text(self) -> str:
+        """
+        Get the mission/goal description text.
+
+        Returns:
+            Human-readable mission description
+        """
+        if self.env is not None:
+            return self.env.mission
+        elif self.task_spec is not None:
+            return self.task_spec.get_mission_text()
+        return "Navigate to the goal"
+
+    def get_state(self) -> GridState:
+        """
+        Get the current environment state.
+
+        Returns:
+            Current GridState
+        """
+        return self._get_grid_state()
+
+    def _get_grid_state(self) -> GridState:
+        """
+        Extract GridState from current environment state.
+
+        This method creates a backend-agnostic representation of the current
+        environment state by inspecting the CustomMiniGridEnv and extracting
+        all relevant information into a standardized GridState object.
+
+        The GridState abstraction allows evaluation code to work with any backend
+        (MiniGrid, MultiGrid, or future implementations) without backend-specific
+        knowledge.
+
+        State Extraction Process:
+        1. Agent state: position, direction, held object
+        2. Mechanism states: switches (active/inactive), gates (open/closed)
+        3. Block positions: locate all blocks by grid scan
+        4. Goal state: check if agent reached goal position
+
+        Performance Note:
+        Block position tracking requires a full grid scan (O(width * height) per block).
+        This is acceptable for small grids (8x8 to 32x32) but could be optimized
+        for larger environments by maintaining a position cache.
+
+        Returns:
+            GridState object with current environment state, or a default empty
+            state if the environment is not initialized.
+        """
+        # Return empty state if environment not initialized
+        if self.env is None:
+            return GridState(
+                agent_position=(0, 0),
+                agent_direction=0,
+            )
+
+        # Extract agent carrying information
+        # The agent can carry keys or other objects. We extract the color for keys,
+        # or a string representation for other object types.
+        carrying = None
+        if self.env.carrying is not None:
+            # Try to get color attribute (for keys), fall back to string representation
+            carrying = getattr(self.env.carrying, "color", str(self.env.carrying))
+
+        # Initialize mechanism state tracking containers
+        open_doors = set()  # Currently unused but reserved for future door state tracking
+        collected_keys = set()  # Currently unused but reserved for key collection tracking
+        active_switches = set()  # IDs of switches that are currently activated
+        open_gates = set()  # IDs of gates that are currently open (passable)
+        block_positions = {}  # Maps block_id -> (x, y) position
+
+        # Track switch states
+        # Switches can be toggled on/off to control gates
+        for switch_id, switch in self.env.switches.items():
+            if switch.is_active:
+                active_switches.add(switch_id)
+
+        # Track gate states
+        # Gates can be open (passable) or closed (blocking)
+        for gate_id, gate in self.env.gates.items():
+            if gate.is_open:
+                open_gates.add(gate_id)
+
+        # Track block positions
+        # Blocks can be pushed around, so we need to locate them in the grid.
+        # This requires scanning the entire grid for each block.
+        # TODO: Consider maintaining a position cache to avoid O(N*W*H) complexity
+        for block_id, block in self.env.blocks.items():
+            # Find block position by scanning grid
+            found = False
+            for x in range(self.env.width):
+                for y in range(self.env.height):
+                    cell = self.env.grid.get(x, y)
+                    if cell is block:
+                        block_positions[block_id] = (x, y)
+                        found = True
+                        break  # Exit inner loop
+                if found:
+                    break  # Exit outer loop
+
+        # Track teleporter cooldown states
+        teleporter_cooldowns = {}
+        for tp_id, tp in self.env.teleporters.items():
+            teleporter_cooldowns[tp_id] = tp.cooldown
+
+        # Check if goal has been reached
+        # Goal is reached when agent position matches goal position from task spec
+        goal_reached = False
+        if self.task_spec is not None:
+            goal_pos = self.task_spec.maze.goal.to_tuple()
+            goal_reached = self.env.agent_pos == goal_pos
+
+        # Get observability info
+        obs_mode = self.task_spec.rules.observability if self.task_spec else "full"
+        visible_cells = set()
+        explored_cells = set()
+        if obs_mode != "full":
+            visible_cells = self.env.get_visible_cells()
+            explored_cells = set(self.env.explored_cells)
+
+        # Construct and return the GridState
+        return GridState(
+            agent_position=self.env.agent_pos,
+            agent_direction=self.env.agent_dir,
+            agent_carrying=carrying,
+            step_count=self.env.step_count,
+            max_steps=self.env.max_steps,
+            open_doors=open_doors,
+            collected_keys=collected_keys,
+            active_switches=active_switches,
+            open_gates=open_gates,
+            block_positions=block_positions,
+            teleporter_cooldowns=teleporter_cooldowns,
+            goal_reached=goal_reached,
+            observability_mode=obs_mode,
+            visible_cells=visible_cells,
+            explored_cells=explored_cells,
+        )
+
+    def _obs_to_rgb(self, obs: dict) -> np.ndarray:
+        """
+        Convert MiniGrid observation to RGB image.
+
+        Args:
+            obs: MiniGrid observation dict
+
+        Returns:
+            RGB image array
+        """
+        if isinstance(obs, dict) and "image" in obs:
+            # Symbolic observation - need to render
+            return self.env.render() if self.env else np.zeros((64, 64, 3), dtype=np.uint8)
+        elif isinstance(obs, np.ndarray):
+            if obs.shape[-1] == 3:
+                return obs.astype(np.uint8)
+            else:
+                # Symbolic grid observation
+                return self.env.render() if self.env else np.zeros((64, 64, 3), dtype=np.uint8)
+        else:
+            return self.env.render() if self.env else np.zeros((64, 64, 3), dtype=np.uint8)
+
+    @property
+    def observation_shape(self) -> tuple[int, int, int]:
+        """Shape of rendered observations."""
+        if self.env is not None:
+            img = self.env.render()
+            return img.shape
+        return (64, 64, 3)
+
+    def close(self) -> None:
+        """Clean up resources."""
+        if self.env is not None:
+            self.env.close()
+            self.env = None
diff --git a/src/v1_1/gridworld/backends/multigrid_backend.py b/src/v1_1/gridworld/backends/multigrid_backend.py
new file mode 100644
index 00000000..0903d4da
--- /dev/null
+++ b/src/v1_1/gridworld/backends/multigrid_backend.py
@@ -0,0 +1,477 @@
+# gridworld/backends/multigrid_backend.py
+
+"""
+MultiGrid Backend Implementation
+
+Adapter for the custom MultiGrid system (src/v1_1/multigrid/) that implements
+the AbstractGridBackend interface. This allows evaluation of custom tilings
+(square, hex, triangle) using the same pipeline as MiniGrid.
+
+Usage:
+    from gridworld.backends import MultiGridBackend
+
+    # Use with triangle tiling
+    backend = MultiGridBackend(tiling="triangle", render_mode="rgb_array")
+    backend.configure(task_spec)
+    obs, state, info = backend.reset(seed=42)
+    obs, reward, terminated, truncated, state, info = backend.step(action)
+"""
+
+import sys
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+
+from .base import AbstractGridBackend, GridState
+from ..task_spec import TaskSpecification
+
+# Add parent directory to path for multigrid imports
+_multigrid_path = Path(__file__).parent.parent.parent / "multigrid"
+if str(_multigrid_path.parent) not in sys.path:
+    sys.path.insert(0, str(_multigrid_path.parent))
+
+
+class MultiGridBackend(AbstractGridBackend):
+    """
+    Backend adapter for the custom MultiGrid system.
+
+    Supports exotic tilings: square, hex, triangle.
+
+    Args:
+        tiling: Tiling type ("square", "hex", "triangle")
+        render_mode: Render mode ("rgb_array" or "human")
+        render_width: Width of rendered image (default 640)
+        render_height: Height of rendered image (default 640)
+    """
+
+    def __init__(
+        self,
+        tiling: str = "square",
+        render_mode: str = "rgb_array",
+        render_width: int = 640,
+        render_height: int = 640,
+    ):
+        super().__init__()
+        self.tiling_type = tiling
+        self.render_mode = render_mode
+        self.render_width = render_width
+        self.render_height = render_height
+
+        # Will be initialized on configure()
+        self.env = None
+        self._step_count = 0
+        self._max_steps = 100
+
+    def configure(self, task_spec: TaskSpecification) -> None:
+        """
+        Configure the backend with a task specification.
+
+        Converts the TaskSpecification to the multigrid format and creates
+        the environment.
+
+        Args:
+            task_spec: The task specification defining the puzzle
+        """
+        self.task_spec = task_spec
+
+        # Convert TaskSpecification to multigrid task_spec dict
+        multigrid_spec = self._convert_task_spec(task_spec)
+
+        # Extract observability settings from task_spec
+        obs_mode = task_spec.rules.observability if task_spec.rules else "full"
+        view_size = task_spec.rules.view_size if task_spec.rules else 7
+        partial = obs_mode != "full"
+
+        # Import and create MultiGridEnv
+        from multigrid.env import MultiGridEnv
+
+        self.env = MultiGridEnv(
+            task_spec=multigrid_spec,
+            tiling=self.tiling_type,
+            render_mode=self.render_mode,
+            partial_obs=partial,
+            obs_radius=view_size // 2,
+            observability_mode=obs_mode,
+        )
+
+        self._max_steps = task_spec.max_steps
+        self._configured = True
+
+    def _convert_task_spec(self, spec: TaskSpecification) -> dict:
+        """
+        Convert TaskSpecification to multigrid task_spec dict format.
+
+        This method bridges the gap between the standard MiniGrid TaskSpecification
+        format (used for consistency across backends) and the MultiGrid-specific
+        format required by the custom MultiGrid environment.
+
+        Key Differences Between Formats:
+        1. Coordinate System:
+           - MiniGrid: Integer grid coordinates (e.g., x=3, y=5)
+           - MultiGrid: Normalized [0,1] coordinates (e.g., x=0.375, y=0.625)
+
+        2. Object Representation:
+           - MiniGrid: Separate mechanism types (keys, doors, blocks)
+           - MultiGrid: Unified "objects" list with type field
+
+        3. Tiling Support:
+           - MiniGrid: Implicit square tiling
+           - MultiGrid: Explicit tiling type (square, hex, triangle)
+
+        Translation Strategy:
+        - Keys → "movable" objects (can be picked up)
+        - Doors → "wall" objects with color (blocking barriers)
+        - Blocks → "movable" objects (pushable)
+        - Switches/Gates → Not yet implemented in MultiGrid backend
+        - Positions → Normalized by dividing by grid dimensions
+
+        Note on Coordinate Normalization:
+        MultiGrid uses normalized [0,1] coordinates to support different tilings
+        uniformly. For example, in an 8x8 grid, position (4, 4) becomes (0.5, 0.5).
+        This allows the same task to be rendered on square, hex, or triangle grids.
+
+        Args:
+            spec: TaskSpecification from the minigrid module (standard format)
+
+        Returns:
+            Dictionary in multigrid format ready for MultiGridEnv initialization
+
+        Limitations:
+            - Switches and gates are not yet supported (MultiGrid enhancement needed)
+            - Teleporters not implemented
+            - Hazards not implemented
+            - All objects except goal are treated as "movable" or "wall"
+        """
+        # Build walls list from maze layout
+        # Walls are kept in absolute coordinates as MultiGrid handles them specially
+        walls = [[w.x, w.y] for w in spec.maze.walls]
+
+        # Build scene objects list
+        # All interactive objects are collected here with unified format
+        objects = []
+
+        # Add keys as movable objects
+        # Keys can be picked up and carried by the agent
+        for key in spec.mechanisms.keys:
+            objects.append({
+                "id": key.id,
+                "type": "movable",
+                "color": key.color,
+                # Normalize position to [0,1] range for MultiGrid
+                "position": {"x": key.position.x / spec.maze.dimensions[0],
+                            "y": key.position.y / spec.maze.dimensions[1]}
+            })
+
+        # Add doors as walls (or special handling)
+        # Doors are treated as colored walls in the current MultiGrid implementation
+        # TODO: Enhance MultiGrid to support door unlocking mechanics
+        for door in spec.mechanisms.doors:
+            objects.append({
+                "id": door.id,
+                "type": "wall",  # Doors are blocking barriers
+                "color": door.requires_key,  # Color indicates which key unlocks it
+                "position": {"x": door.position.x / spec.maze.dimensions[0],
+                            "y": door.position.y / spec.maze.dimensions[1]}
+            })
+
+        # Add blocks as movable objects
+        # Blocks can be pushed by the agent (Sokoban-style)
+        for block in spec.mechanisms.blocks:
+            objects.append({
+                "id": block.id,
+                "type": "movable",
+                "color": "grey",  # Default block color
+                "position": {"x": block.position.x / spec.maze.dimensions[0],
+                            "y": block.position.y / spec.maze.dimensions[1]}
+            })
+
+        # Build goal specification
+        # MultiGrid supports multiple goal types with different win conditions
+        goal_spec = {}
+        if spec.goal:
+            if spec.goal.goal_type == "reach_position":
+                # Win by reaching a specific position
+                goal_spec = {
+                    "type": "reach_position",
+                    "target": {
+                        "x": spec.goal.target.x / spec.maze.dimensions[0],
+                        "y": spec.goal.target.y / spec.maze.dimensions[1]
+                    }
+                }
+            elif spec.goal.goal_type == "collect_all":
+                # Win by collecting all specified objects
+                goal_spec = {
+                    "type": "collect_all",
+                    "target_ids": spec.goal.target_ids
+                }
+            elif spec.goal.goal_type == "push_block_to":
+                # Win by pushing blocks to target positions (Sokoban-style)
+                goal_spec = {
+                    "type": "push_block_to",
+                    "target_ids": spec.goal.target_ids,
+                    "target_positions": [
+                        {"x": p.x / spec.maze.dimensions[0],
+                         "y": p.y / spec.maze.dimensions[1]}
+                        for p in spec.goal.target_positions
+                    ] if spec.goal.target_positions else []
+                }
+
+        # Construct complete MultiGrid task specification
+        return {
+            "task_id": spec.task_id,
+            "seed": spec.seed,
+            "tiling": {
+                "type": self.tiling_type,  # square, hex, or triangle
+                "grid_size": {
+                    "width": spec.maze.dimensions[0],
+                    "height": spec.maze.dimensions[1]
+                }
+            },
+            "scene": {
+                "agent": {
+                    "position": {
+                        # Agent start position in normalized coordinates
+                        "x": spec.maze.start.x / spec.maze.dimensions[0],
+                        "y": spec.maze.start.y / spec.maze.dimensions[1]
+                    },
+                    "facing": 0  # Default direction (right)
+                },
+                "objects": objects,
+                "walls": walls
+            },
+            "goal": goal_spec,
+            "limits": {
+                "max_steps": spec.max_steps
+            }
+        }
+
+    def reset(self, seed: Optional[int] = None) -> tuple[np.ndarray, GridState, dict]:
+        """
+        Reset the environment to initial state.
+
+        Args:
+            seed: Random seed for reproducibility
+
+        Returns:
+            observation: The initial observation (RGB image)
+            state: The initial GridState
+            info: Additional information dictionary
+        """
+        if not self._configured or self.env is None:
+            raise RuntimeError("Backend must be configured before reset")
+
+        obs, info = self.env.reset(seed=seed)
+        self._step_count = 0
+
+        state = self._build_grid_state()
+
+        return obs, state, info
+
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, GridState, dict]:
+        """
+        Execute one action in the environment.
+
+        This method provides the bridge between the standard MiniGrid action space
+        (used for consistency across backends) and the MultiGrid-specific action
+        indices. The mapping ensures that the same agent policy can work with both
+        backends without modification.
+
+        Action Space Translation:
+        MiniGrid uses a 7-action discrete space (0-6), while MultiGrid has a
+        different internal action enumeration. This method translates between them:
+
+        MiniGrid Action → MultiGrid Action
+        0: turn_left     → 2: TURN_LEFT
+        1: turn_right    → 3: TURN_RIGHT
+        2: forward       → 0: FORWARD
+        3: pickup        → 4: PICKUP
+        4: drop          → 5: DROP
+        5: toggle        → 6: PUSH (closest equivalent for switch/door interaction)
+        6: done/wait     → 7: WAIT
+
+        Note on "toggle" vs "PUSH":
+        MiniGrid's "toggle" action is used for switches, doors, and other interactive
+        objects. MultiGrid's closest equivalent is "PUSH", which can interact with
+        objects in front of the agent. This mapping may need refinement as MultiGrid
+        adds more interaction mechanics.
+
+        Design Rationale:
+        The action mapping allows evaluation code to use standard MiniGrid action
+        indices regardless of backend. This is critical for:
+        - Running the same agent policy on different backends
+        - Comparing results across backends
+        - Using pre-trained models that expect MiniGrid actions
+
+        Args:
+            action: The action to execute (0-6, standard MiniGrid action space)
+
+        Returns:
+            observation: RGB image of the new state
+            reward: Reward for this step
+            terminated: Whether the episode ended (goal reached or failure)
+            truncated: Whether the episode was cut short (max steps reached)
+            state: GridState representing the new environment state
+            info: Additional information dictionary from the environment
+
+        Raises:
+            RuntimeError: If the backend has not been configured or reset
+        """
+        if not self._configured or self.env is None:
+            raise RuntimeError("Backend must be configured before step")
+
+        # Map MiniGrid action to MultiGrid action
+        # This translation ensures compatibility between backends
+        action_map = {
+            0: 2,  # turn_left -> TURN_LEFT
+            1: 3,  # turn_right -> TURN_RIGHT
+            2: 0,  # forward -> FORWARD
+            3: 4,  # pickup -> PICKUP
+            4: 5,  # drop -> DROP
+            5: 6,  # toggle -> PUSH (closest equivalent)
+            6: 7,  # done -> WAIT
+        }
+
+        # Get MultiGrid action index, default to WAIT if action invalid
+        multigrid_action = action_map.get(action, 7)
+
+        # Execute action in MultiGrid environment
+        obs, reward, terminated, truncated, info = self.env.step(multigrid_action)
+
+        # Track step count (MultiGrid doesn't track this internally)
+        self._step_count += 1
+
+        # Build GridState for backend-agnostic representation
+        state = self._build_grid_state()
+        # Update state with step results
+        state.terminated = terminated
+        state.truncated = truncated
+        state.reward = reward
+        state.step_count = self._step_count
+
+        return obs, reward, terminated, truncated, state, info
+
+    def render(self) -> np.ndarray:
+        """
+        Render the current environment state.
+
+        Returns:
+            RGB image array of shape (H, W, 3)
+        """
+        if self.env is None:
+            return np.zeros((self.render_height, self.render_width, 3), dtype=np.uint8)
+
+        return self.env.render()
+
+    def get_mission_text(self) -> str:
+        """
+        Get the mission/goal description text.
+
+        Returns:
+            Human-readable mission description
+        """
+        if self.task_spec is None:
+            return "No mission"
+
+        # Use task description or generate from goal
+        if self.task_spec.description:
+            return self.task_spec.description
+
+        if self.task_spec.goal:
+            goal_type = self.task_spec.goal.goal_type
+            if goal_type == "reach_position":
+                return f"Navigate to position ({self.task_spec.goal.target.x}, {self.task_spec.goal.target.y})"
+            elif goal_type == "collect_all":
+                return f"Collect all items: {', '.join(self.task_spec.goal.target_ids)}"
+            elif goal_type == "push_block_to":
+                return "Push blocks to target positions"
+
+        return "Complete the task"
+
+    def get_state(self) -> GridState:
+        """
+        Get the current environment state.
+
+        Returns:
+            Current GridState
+        """
+        return self._build_grid_state()
+
+    def _build_grid_state(self) -> GridState:
+        """
+        Build a GridState from the current MultiGrid state.
+
+        Returns:
+            GridState representing current environment
+        """
+        if self.env is None or self.env.state is None:
+            return GridState(
+                agent_position=(0, 0),
+                agent_direction=0,
+                step_count=self._step_count,
+                max_steps=self._max_steps,
+            )
+
+        state = self.env.state
+        tiling = self.env.tiling
+
+        # Get agent position in grid coordinates
+        agent_pos = tiling.cell_to_canonical(state.agent.cell_id)
+        grid_pos = (
+            int(agent_pos[0] * self.task_spec.maze.dimensions[0]),
+            int(agent_pos[1] * self.task_spec.maze.dimensions[1])
+        )
+
+        # Get carrying object
+        carrying = None
+        if state.agent.holding is not None:
+            carrying = state.agent.holding.id
+
+        # Build block positions
+        block_positions = {}
+        for obj_id, obj in state.objects.items():
+            if obj.obj_type == "movable" and obj.cell_id is not None:
+                pos = tiling.cell_to_canonical(obj.cell_id)
+                block_positions[obj_id] = (
+                    int(pos[0] * self.task_spec.maze.dimensions[0]),
+                    int(pos[1] * self.task_spec.maze.dimensions[1])
+                )
+
+        # Convert visibility sets from cell_id strings to (x,y) grid coords
+        obs_mode = getattr(state, 'observability_mode', 'full')
+        visible_xy = set()
+        explored_xy = set()
+
+        if obs_mode != "full":
+            dims = self.task_spec.maze.dimensions
+            for cell_id in state.visible_cells:
+                pos = tiling.cell_to_canonical(cell_id)
+                visible_xy.add((int(pos[0] * dims[0]), int(pos[1] * dims[1])))
+            for cell_id in state.explored_cells:
+                pos = tiling.cell_to_canonical(cell_id)
+                explored_xy.add((int(pos[0] * dims[0]), int(pos[1] * dims[1])))
+
+        return GridState(
+            agent_position=grid_pos,
+            agent_direction=state.agent.facing,
+            agent_carrying=carrying,
+            step_count=self._step_count,
+            max_steps=self._max_steps,
+            block_positions=block_positions,
+            goal_reached=state.check_goal(),
+            observability_mode=obs_mode,
+            visible_cells=visible_xy,
+            explored_cells=explored_xy,
+        )
+
+    def close(self) -> None:
+        """Clean up resources."""
+        if self.env is not None:
+            # MultiGridEnv doesn't have explicit close
+            self.env = None
+        self._configured = False
+
+    @property
+    def observation_shape(self) -> tuple[int, int, int]:
+        """Shape of observations (H, W, C)."""
+        return (64, 64, 3)
diff --git a/src/v1_1/gridworld/custom_env.py b/src/v1_1/gridworld/custom_env.py
new file mode 100644
index 00000000..e4232f6d
--- /dev/null
+++ b/src/v1_1/gridworld/custom_env.py
@@ -0,0 +1,431 @@
+"""
+Custom MiniGrid Environment
+
+A configurable MiniGrid environment that can be populated from TaskSpecification.
+Supports all mechanism types: keys, doors, switches, gates, blocks, hazards.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from typing import Optional, Any
+
+# Import from gymnasium's minigrid package (no naming conflict after rename to gridworld/)
+from minigrid.core.grid import Grid
+from minigrid.core.mission import MissionSpace
+from minigrid.core.world_object import WorldObj, Key, Door, Goal, Wall, Lava, Box, Ball
+from minigrid.minigrid_env import MiniGridEnv
+
+from .task_spec import TaskSpecification, Position
+
+
+# Color mapping for MiniGrid
+MINIGRID_COLORS = {
+    "red": "red",
+    "blue": "blue",
+    "green": "green",
+    "yellow": "yellow",
+    "purple": "purple",
+    "grey": "grey",
+    "gray": "grey",
+}
+
+
+class Switch(Ball):
+    """
+    Switch object that can control gates.
+    Rendered as a ball with special interaction behavior.
+    """
+
+    def __init__(self, color: str = "yellow", switch_id: str = "", controls: list[str] = None):
+        super().__init__(color)
+        self.switch_id = switch_id
+        self.controls = controls or []
+        self.is_active = False
+
+    def can_pickup(self):
+        return False
+
+    def toggle(self, env, pos):
+        """Toggle the switch state and update controlled gates."""
+        self.is_active = not self.is_active
+        # Gate toggling is handled by the environment
+        return True
+
+
+class Gate(Door):
+    """
+    Gate object controlled by switches.
+    When closed, blocks movement like a wall. When open, passable.
+    Extends Door for proper rendering.
+    """
+
+    def __init__(self, color: str = "grey", gate_id: str = "", is_open: bool = False):
+        # Initialize as unlocked door
+        super().__init__(color, is_locked=False)
+        self.gate_id = gate_id
+        self.is_open = is_open
+
+    def can_overlap(self):
+        return self.is_open
+
+    def see_behind(self):
+        return self.is_open
+
+    def toggle(self, env, pos):
+        # Gates can only be toggled by switches, not directly
+        return False
+
+
+class TeleporterObj(Ball):
+    """
+    Teleporter endpoint object.
+    When the agent steps on it, they are teleported to the partner endpoint.
+    Rendered as a ball with special portal appearance.
+    """
+
+    def __init__(self, color: str = "purple", teleporter_id: str = "",
+                 partner: "TeleporterObj | None" = None, cooldown_max: int = 1):
+        super().__init__(color)
+        self.teleporter_id = teleporter_id
+        self.partner: TeleporterObj | None = partner
+        self.cooldown = 0
+        self.cooldown_max = cooldown_max
+
+    def can_overlap(self):
+        return True
+
+    def can_pickup(self):
+        return False
+
+
+class PushableBlock(Box):
+    """
+    A block that can be pushed by the agent.
+    Extends Box to leverage existing rendering.
+    """
+
+    def __init__(self, color: str = "grey", block_id: str = ""):
+        super().__init__(color)
+        self.block_id = block_id
+        self.pushable = True
+
+    def can_pickup(self):
+        return False
+
+
+class CustomMiniGridEnv(MiniGridEnv):
+    """
+    Custom MiniGrid environment that can be configured from a TaskSpecification.
+
+    This environment supports:
+    - Arbitrary maze layouts
+    - Keys and colored doors
+    - Switches and gates
+    - Pushable blocks
+    - Hazards (lava)
+    - Custom goal conditions
+    """
+
+    def __init__(
+        self,
+        width: int = 8,
+        height: int = 8,
+        max_steps: int = 100,
+        agent_start_pos: Optional[tuple[int, int]] = None,
+        agent_start_dir: int = 0,
+        goal_pos: Optional[tuple[int, int]] = None,
+        mission_text: str = "Navigate to the goal",
+        render_mode: Optional[str] = None,
+        task_spec: Optional[TaskSpecification] = None,
+        see_through_walls: bool = True,
+        agent_view_size: int = 7,
+        highlight: bool = True,
+        agent_pov: bool = False,
+        **kwargs,
+    ):
+        self.agent_start_pos = agent_start_pos
+        self.agent_start_dir = agent_start_dir
+        self.goal_pos = goal_pos
+        self._custom_mission_text = mission_text  # Store our custom mission text
+        self.task_spec = task_spec
+
+        # Mechanism tracking
+        self.switches: dict[str, Switch] = {}
+        self.gates: dict[str, Gate] = {}
+        self.blocks: dict[str, PushableBlock] = {}
+        self.teleporters: dict[str, TeleporterObj] = {}
+        self.switch_gate_map: dict[str, list[str]] = {}  # switch_id -> [gate_ids]
+
+        # Fog of war tracking: set of (x, y) cells the agent has visited/seen
+        self.explored_cells: set[tuple[int, int]] = set()
+
+        # Mission space for the environment - the func returns our custom text
+        mission_space = MissionSpace(mission_func=lambda: mission_text)
+
+        super().__init__(
+            mission_space=mission_space,
+            width=width,
+            height=height,
+            max_steps=max_steps,
+            see_through_walls=see_through_walls,
+            agent_view_size=agent_view_size,
+            highlight=highlight,
+            agent_pov=agent_pov,
+            render_mode=render_mode,
+            **kwargs,
+        )
+
+        # After super().__init__, self.mission is set by the parent class
+        # We can update it to our custom text if needed
+        self.mission = mission_text
+
+    def _gen_grid(self, width: int, height: int):
+        """Generate the grid. Called by reset()."""
+        # Create empty grid
+        self.grid = Grid(width, height)
+
+        # Add border walls
+        self.grid.wall_rect(0, 0, width, height)
+
+        # Reset fog-of-war tracking
+        self.explored_cells = set()
+
+        # If we have a task spec, it will be populated after _gen_grid by the parser
+        # For now, set basic start/goal if provided
+
+        if self.agent_start_pos is not None:
+            self.agent_pos = self.agent_start_pos
+            self.agent_dir = self.agent_start_dir
+        else:
+            # Default: place agent at (1, 1)
+            self.agent_pos = (1, 1)
+            self.agent_dir = 0
+
+        if self.goal_pos is not None:
+            self.put_obj(Goal(), self.goal_pos[0], self.goal_pos[1])
+
+    def place_wall(self, x: int, y: int):
+        """Place a wall at the given position."""
+        self.grid.set(x, y, Wall())
+
+    def place_key(self, x: int, y: int, color: str):
+        """Place a key at the given position."""
+        color = MINIGRID_COLORS.get(color, color)
+        self.put_obj(Key(color), x, y)
+
+    def place_door(self, x: int, y: int, color: str, is_locked: bool = True):
+        """Place a door at the given position."""
+        color = MINIGRID_COLORS.get(color, color)
+        door = Door(color, is_locked=is_locked)
+        self.grid.set(x, y, door)
+
+    def place_switch(self, x: int, y: int, switch_id: str, controls: list[str], color: str = "yellow"):
+        """Place a switch at the given position."""
+        switch = Switch(color=color, switch_id=switch_id, controls=controls)
+        self.switches[switch_id] = switch
+        self.switch_gate_map[switch_id] = controls
+        self.put_obj(switch, x, y)
+
+    def place_gate(self, x: int, y: int, gate_id: str, is_open: bool = False, color: str = "grey"):
+        """Place a gate at the given position."""
+        gate = Gate(color=color, gate_id=gate_id, is_open=is_open)
+        self.gates[gate_id] = gate
+        self.grid.set(x, y, gate)
+
+    def place_block(self, x: int, y: int, block_id: str, color: str = "grey"):
+        """Place a pushable block at the given position."""
+        block = PushableBlock(color=color, block_id=block_id)
+        self.blocks[block_id] = block
+        self.put_obj(block, x, y)
+
+    def place_hazard(self, x: int, y: int, hazard_type: str = "lava"):
+        """Place a hazard at the given position."""
+        # All hazards use Lava for now
+        self.grid.set(x, y, Lava())
+
+    def place_teleporter(self, teleporter_id: str, x_a: int, y_a: int,
+                         x_b: int, y_b: int, bidirectional: bool = True,
+                         color: str = "purple"):
+        """Place a teleporter pair at the given positions."""
+        tp_a = TeleporterObj(color=color, teleporter_id=f"{teleporter_id}_a")
+        tp_b = TeleporterObj(color=color, teleporter_id=f"{teleporter_id}_b")
+        tp_a.partner = tp_b
+        if bidirectional:
+            tp_b.partner = tp_a
+        self.teleporters[f"{teleporter_id}_a"] = tp_a
+        self.teleporters[f"{teleporter_id}_b"] = tp_b
+        self.put_obj(tp_a, x_a, y_a)
+        self.put_obj(tp_b, x_b, y_b)
+
+    def place_goal(self, x: int, y: int):
+        """Place the goal at the given position."""
+        self.put_obj(Goal(), x, y)
+
+    def set_agent_position(self, x: int, y: int, direction: int = 0):
+        """Set the agent's starting position and direction."""
+        self.agent_pos = (x, y)
+        self.agent_dir = direction
+
+    def toggle_gate(self, gate_id: str):
+        """Toggle a gate's open/closed state."""
+        if gate_id in self.gates:
+            gate = self.gates[gate_id]
+            gate.is_open = not gate.is_open
+
+    def step(self, action: int):
+        """Execute one step in the environment with custom mechanics."""
+        # Get the position in front of the agent
+        fwd_pos = self.front_pos
+        fwd_cell = self.grid.get(*fwd_pos)
+
+        # Handle key consumption when unlocking doors
+        if action == self.actions.toggle and isinstance(fwd_cell, Door) and not isinstance(fwd_cell, Gate):
+            if fwd_cell.is_locked and self.carrying is not None:
+                if isinstance(self.carrying, Key) and self.carrying.color == fwd_cell.color:
+                    # Key matches - unlock the door
+                    fwd_cell.is_locked = False
+                    fwd_cell.is_open = True
+
+                    # Check if key should be consumed
+                    if self.task_spec and self.task_spec.rules.key_consumption:
+                        self.carrying = None  # Consume the key
+
+                    # Return after handling
+                    self.step_count += 1
+                    truncated = self.step_count >= self.max_steps
+                    obs = self.gen_obs()
+                    return obs, 0, False, truncated, {}
+
+        # Handle gate toggle attempt (gates can only be opened by switches, not directly)
+        if action == self.actions.toggle and isinstance(fwd_cell, Gate):
+            # No-op: gates are not directly toggleable
+            self.step_count += 1
+            truncated = self.step_count >= self.max_steps
+            obs = self.gen_obs()
+            return obs, 0, False, truncated, {}
+
+        # Handle switch interaction
+        if action == self.actions.toggle and isinstance(fwd_cell, Switch):
+            # Toggle the switch
+            fwd_cell.is_active = not fwd_cell.is_active
+            # Toggle all controlled gates
+            for gate_id in fwd_cell.controls:
+                self.toggle_gate(gate_id)
+            # Return after handling (don't fall through to super which would re-toggle)
+            self.step_count += 1
+            truncated = self.step_count >= self.max_steps
+            obs = self.gen_obs()
+            return obs, 0, False, truncated, {}
+
+        # Handle block pushing
+        if action == self.actions.forward and isinstance(fwd_cell, PushableBlock):
+            # Calculate position behind the block
+            dir_vec = self.dir_vec
+            behind_block_pos = (fwd_pos[0] + dir_vec[0], fwd_pos[1] + dir_vec[1])
+
+            # Check if we can push the block
+            behind_cell = self.grid.get(*behind_block_pos)
+            if behind_cell is None or behind_cell.can_overlap():
+                # Push the block
+                self.grid.set(*fwd_pos, None)
+                self.grid.set(*behind_block_pos, fwd_cell)
+                # Agent moves forward
+                self.agent_pos = fwd_pos
+
+                # Check step count and return
+                self.step_count += 1
+
+                if self.step_count >= self.max_steps:
+                    truncated = True
+                else:
+                    truncated = False
+
+                # Check if goal reached
+                terminated = False
+                reward = 0
+                if self.goal_pos and self.agent_pos == self.goal_pos:
+                    terminated = True
+                    reward = 1 - 0.9 * (self.step_count / self.max_steps)
+                elif isinstance(self.grid.get(*self.agent_pos), Goal):
+                    terminated = True
+                    reward = 1 - 0.9 * (self.step_count / self.max_steps)
+
+                obs = self.gen_obs()
+                return obs, reward, terminated, truncated, {}
+
+        # Handle gate blocking
+        if action == self.actions.forward and isinstance(fwd_cell, Gate) and not fwd_cell.is_open:
+            # Can't move through closed gate
+            self.step_count += 1
+            if self.step_count >= self.max_steps:
+                truncated = True
+            else:
+                truncated = False
+            obs = self.gen_obs()
+            return obs, 0, False, truncated, {}
+
+        # Default behavior
+        obs, reward, terminated, truncated, info = super().step(action)
+
+        # Tick teleporter cooldowns
+        for tp in self.teleporters.values():
+            if tp.cooldown > 0:
+                tp.cooldown -= 1
+
+        # Check if agent landed on a teleporter after moving forward
+        if action == self.actions.forward:
+            cell = self.grid.get(*self.agent_pos)
+            if isinstance(cell, TeleporterObj) and cell.partner is not None and cell.cooldown == 0:
+                # Find partner position
+                for x in range(self.width):
+                    for y in range(self.height):
+                        if self.grid.get(x, y) is cell.partner:
+                            self.agent_pos = (x, y)
+                            # Set cooldown on destination to prevent immediate bounce-back
+                            cell.partner.cooldown = cell.partner.cooldown_max
+                            # Regenerate observation after teleport
+                            obs = self.gen_obs()
+                            break
+                    else:
+                        continue
+                    break
+
+        return obs, reward, terminated, truncated, info
+
+    def get_mission_text(self) -> str:
+        """Return the mission text."""
+        return self._custom_mission_text
+
+    def get_visible_cells(self) -> set[tuple[int, int]]:
+        """Get the set of (x, y) cells currently visible to the agent via view cone.
+
+        Uses the same coordinate mapping as MiniGrid's get_frame highlight logic:
+        the vis_mask from gen_obs_grid is in rotated agent-relative space, and we
+        map back to absolute grid coordinates using dir_vec / right_vec.
+        """
+        _, vis_mask = self.gen_obs_grid()
+        visible = set()
+
+        # MiniGrid coordinate mapping: agent is at bottom-center of rotated view
+        f_vec = self.dir_vec
+        r_vec = np.array((-f_vec[1], f_vec[0]))
+        top_left = (
+            np.array(self.agent_pos)
+            + f_vec * (self.agent_view_size - 1)
+            - r_vec * (self.agent_view_size // 2)
+        )
+
+        for vis_i in range(self.agent_view_size):
+            for vis_j in range(self.agent_view_size):
+                if not vis_mask[vis_i, vis_j]:
+                    continue
+                abs_pos = top_left - (f_vec * vis_j) + (r_vec * vis_i)
+                abs_x, abs_y = int(abs_pos[0]), int(abs_pos[1])
+                if 0 <= abs_x < self.width and 0 <= abs_y < self.height:
+                    visible.add((abs_x, abs_y))
+        return visible
+
+    def update_explored(self):
+        """Update fog-of-war: add currently visible cells to explored set."""
+        self.explored_cells |= self.get_visible_cells()
diff --git a/src/v1_1/gridworld/demo.py b/src/v1_1/gridworld/demo.py
new file mode 100644
index 00000000..dde2c691
--- /dev/null
+++ b/src/v1_1/gridworld/demo.py
@@ -0,0 +1,480 @@
+#!/usr/bin/env python3
+"""
+MiniGrid Backend Demo
+
+Demonstrates the MiniGridBackend (gymnasium-based) for standard square grid tasks.
+Shows loading tasks, running episodes, using policies, and saving visualizations.
+
+Usage:
+    cd src/v1_1
+    python gridworld/demo.py              # Run all demos
+    python gridworld/demo.py --visual     # Save PNG images of each demo
+    python gridworld/demo.py --play       # Interactive play mode
+    python gridworld/demo.py --play --task tier2/single_key_001  # Play specific task
+"""
+
+import sys
+import argparse
+from pathlib import Path
+import numpy as np
+
+# Ensure imports work from the v1_1 directory
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from gridworld.task_spec import TaskSpecification
+from gridworld.backends import get_backend, MiniGridBackend
+from gridworld.backends.base import GridState
+from gridworld.runner.grid_runner import GridRunner
+from gridworld.actions import MiniGridActions, ACTION_NAMES
+from gridworld.envs.tier_envs import list_available_envs
+
+
+def interactive_play(task_path: str = None):
+    """
+    Interactive play mode - control the agent with keyboard.
+
+    Controls:
+        Arrow Keys: Move/Turn (Up=forward, Left/Right=turn)
+        Space: Pickup
+        D: Drop
+        T or Enter: Toggle (open door, activate switch)
+        R: Reset episode
+        Q or Escape: Quit
+    """
+    import pygame
+
+    # Default to a tier 2 task for interesting gameplay
+    if task_path is None:
+        task_path = Path(__file__).parent / "tasks" / "tier2" / "single_key_001.json"
+    else:
+        # Handle relative paths like "tier2/single_key_001"
+        if not Path(task_path).exists():
+            task_path = Path(__file__).parent / "tasks" / f"{task_path}.json"
+
+    spec = TaskSpecification.from_json(str(task_path))
+
+    print("\n" + "=" * 60)
+    print("Interactive Play Mode")
+    print("=" * 60)
+    print(f"\nTask: {spec.task_id}")
+    print(f"Description: {spec.description}")
+    print(f"\nControls:")
+    print("  Arrow Up    : Move forward")
+    print("  Arrow Left  : Turn left")
+    print("  Arrow Right : Turn right")
+    print("  Space       : Pickup")
+    print("  D           : Drop")
+    print("  T / Enter   : Toggle (doors, switches)")
+    print("  R           : Reset")
+    print("  Q / Escape  : Quit")
+    print("\n" + "-" * 60)
+
+    # Create backend with rgb_array mode (we'll display via pygame)
+    backend = get_backend("minigrid", render_mode="rgb_array")
+    backend.configure(spec)
+    obs, state, info = backend.reset(seed=42)
+
+    # Initialize pygame
+    pygame.init()
+
+    # Scale up for visibility
+    scale = 2
+    display_size = (obs.shape[1] * scale, obs.shape[0] * scale)
+    screen = pygame.display.set_mode(display_size)
+    pygame.display.set_caption(f"MiniGrid: {spec.task_id}")
+
+    # Key mapping
+    key_to_action = {
+        pygame.K_UP: MiniGridActions.MOVE_FORWARD,
+        pygame.K_LEFT: MiniGridActions.TURN_LEFT,
+        pygame.K_RIGHT: MiniGridActions.TURN_RIGHT,
+        pygame.K_SPACE: MiniGridActions.PICKUP,
+        pygame.K_d: MiniGridActions.DROP,
+        pygame.K_t: MiniGridActions.TOGGLE,
+        pygame.K_RETURN: MiniGridActions.TOGGLE,
+    }
+
+    clock = pygame.time.Clock()
+    running = True
+    step_count = 0
+
+    def render_frame():
+        # Convert numpy array to pygame surface
+        surf = pygame.surfarray.make_surface(obs.swapaxes(0, 1))
+        surf = pygame.transform.scale(surf, display_size)
+        screen.blit(surf, (0, 0))
+        pygame.display.flip()
+
+    def print_status():
+        carrying = state.agent_carrying if state.agent_carrying else "nothing"
+        print(f"  Step {step_count}: pos={state.agent_position}, carrying={carrying}")
+
+    render_frame()
+    print(f"\nStarting at {state.agent_position}")
+
+    while running:
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                running = False
+            elif event.type == pygame.KEYDOWN:
+                if event.key in (pygame.K_q, pygame.K_ESCAPE):
+                    running = False
+                elif event.key == pygame.K_r:
+                    # Reset
+                    obs, state, info = backend.reset(seed=42)
+                    step_count = 0
+                    render_frame()
+                    print("\n--- Episode Reset ---")
+                    print(f"Starting at {state.agent_position}")
+                elif event.key in key_to_action:
+                    action = key_to_action[event.key]
+                    obs, reward, terminated, truncated, state, info = backend.step(action)
+                    step_count += 1
+                    render_frame()
+                    print_status()
+
+                    if terminated:
+                        print("\n*** GOAL REACHED! ***")
+                        print(f"Completed in {step_count} steps")
+                        print("Press R to reset or Q to quit")
+                    elif truncated:
+                        print("\n*** TIME LIMIT REACHED ***")
+                        print("Press R to reset or Q to quit")
+
+        clock.tick(30)
+
+    pygame.quit()
+    backend.close()
+    print("\n✓ Interactive session ended")
+
+
+def save_image(obs: np.ndarray, path: str):
+    """Save observation as PNG image."""
+    try:
+        from PIL import Image
+        img = Image.fromarray(obs)
+        img.save(path)
+        print(f"  Saved: {path}")
+    except ImportError:
+        print("  PIL not available, skipping image save")
+
+
+def demo_backend_basics(save_images: bool = False):
+    """Demonstrate basic backend usage."""
+    print("\n" + "=" * 60)
+    print("Demo 1: Backend Basics")
+    print("=" * 60)
+
+    # Load a task
+    task_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    spec = TaskSpecification.from_json(str(task_path))
+
+    print(f"\nTask: {spec.task_id}")
+    print(f"Description: {spec.description}")
+    print(f"Grid size: {spec.maze.dimensions}")
+    print(f"Start: {spec.maze.start.to_tuple()}")
+    print(f"Goal: {spec.maze.goal.to_tuple()}")
+
+    # Create backend
+    backend = get_backend("minigrid", render_mode="rgb_array")
+    backend.configure(spec)
+
+    # Reset environment
+    obs, state, info = backend.reset(seed=42)
+
+    print(f"\nInitial state:")
+    print(f"  Agent position: {state.agent_position}")
+    print(f"  Agent direction: {state.agent_direction}")
+    print(f"  Observation shape: {obs.shape}")
+    print(f"  Mission: {backend.get_mission_text()}")
+
+    # Take a few steps
+    actions = [
+        MiniGridActions.TURN_RIGHT,
+        MiniGridActions.MOVE_FORWARD,
+        MiniGridActions.MOVE_FORWARD,
+    ]
+
+    print("\nExecuting actions:")
+    for action in actions:
+        obs, reward, terminated, truncated, state, info = backend.step(action)
+        print(f"  {ACTION_NAMES[action]}: pos={state.agent_position}, reward={reward:.2f}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        save_image(obs, str(output_dir / "demo1_minigrid_basic.png"))
+
+    backend.close()
+    print("\n✓ Backend basics demo complete")
+
+
+def demo_key_door_puzzle(save_images: bool = False):
+    """Demonstrate a key-door puzzle (Tier 2)."""
+    print("\n" + "=" * 60)
+    print("Demo 2: Key-Door Puzzle (Tier 2)")
+    print("=" * 60)
+
+    task_path = Path(__file__).parent / "tasks" / "tier2" / "single_key_001.json"
+    spec = TaskSpecification.from_json(str(task_path))
+
+    print(f"\nTask: {spec.task_id}")
+    print(f"Description: {spec.description}")
+    print(f"Keys: {[(k.id, k.color) for k in spec.mechanisms.keys]}")
+    print(f"Doors: {[(d.id, d.requires_key) for d in spec.mechanisms.doors]}")
+
+    backend = get_backend("minigrid", render_mode="rgb_array")
+    backend.configure(spec)
+    obs, state, info = backend.reset(seed=42)
+
+    print(f"\nInitial: Agent at {state.agent_position}, carrying: {state.agent_carrying}")
+
+    # Expert solution for this puzzle
+    solution = [
+        MiniGridActions.TURN_RIGHT,   # Face down
+        MiniGridActions.MOVE_FORWARD,  # Move down
+        MiniGridActions.MOVE_FORWARD,  # Move down to key row
+        MiniGridActions.TURN_LEFT,     # Face right
+        MiniGridActions.MOVE_FORWARD,  # Move to key
+        MiniGridActions.PICKUP,        # Get key
+        MiniGridActions.MOVE_FORWARD,  # Move right
+        MiniGridActions.MOVE_FORWARD,  # Move right
+        MiniGridActions.TOGGLE,        # Unlock door
+        MiniGridActions.MOVE_FORWARD,  # Through door
+        MiniGridActions.MOVE_FORWARD,  # Continue
+        MiniGridActions.TURN_RIGHT,    # Face down
+        MiniGridActions.MOVE_FORWARD,  # Move to goal
+        MiniGridActions.MOVE_FORWARD,
+        MiniGridActions.MOVE_FORWARD,
+    ]
+
+    print("\nExecuting expert solution:")
+    for i, action in enumerate(solution):
+        obs, reward, terminated, truncated, state, info = backend.step(action)
+        status = ""
+        if state.agent_carrying:
+            status = f", carrying={state.agent_carrying}"
+        if terminated:
+            status += " [GOAL REACHED]"
+        print(f"  {i+1}. {ACTION_NAMES[action]}: pos={state.agent_position}{status}")
+
+        if terminated:
+            break
+
+    print(f"\nResult: {'SUCCESS' if terminated else 'IN PROGRESS'}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        save_image(obs, str(output_dir / "demo2_key_door.png"))
+
+    backend.close()
+    print("\n✓ Key-door puzzle demo complete")
+
+
+def demo_runner_evaluation(save_images: bool = False):
+    """Demonstrate using GridRunner for evaluation."""
+    print("\n" + "=" * 60)
+    print("Demo 3: GridRunner Evaluation")
+    print("=" * 60)
+
+    # Load multiple tasks
+    task_dir = Path(__file__).parent / "tasks"
+    tasks = []
+    for tier in range(1, 4):  # Tiers 1-3
+        tier_dir = task_dir / f"tier{tier}"
+        if tier_dir.exists():
+            for json_file in sorted(tier_dir.glob("*.json"))[:1]:  # First task per tier
+                tasks.append(TaskSpecification.from_json(str(json_file)))
+
+    print(f"\nLoaded {len(tasks)} tasks:")
+    for t in tasks:
+        print(f"  - {t.task_id} (Tier {t.difficulty_tier})")
+
+    # Create runner with random policy
+    runner = GridRunner(render_mode="rgb_array")
+
+    def random_policy(obs, state, mission):
+        """Simple random policy with bias toward forward movement."""
+        import random
+        weights = [0.1, 0.1, 0.5, 0.1, 0.05, 0.1, 0.05]  # Heavy forward bias
+        return random.choices(range(7), weights=weights)[0]
+
+    print("\nRunning episodes with random policy:")
+    results = []
+    for spec in tasks:
+        result = runner.run_episode(spec, policy_fn=random_policy, seed=42)
+        results.append(result)
+        status = "SUCCESS" if result.success else "FAILED"
+        print(f"  {spec.task_id}: {status} in {result.steps_taken} steps")
+
+    # Summary
+    success_rate = sum(r.success for r in results) / len(results) * 100
+    avg_steps = sum(r.steps_taken for r in results) / len(results)
+
+    print(f"\nSummary:")
+    print(f"  Success rate: {success_rate:.1f}%")
+    print(f"  Average steps: {avg_steps:.1f}")
+
+    if save_images and results:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        # Save final observation from first result
+        if results[0].trajectory:
+            final_obs = results[0].trajectory[-1].observation
+            save_image(final_obs, str(output_dir / "demo3_evaluation.png"))
+
+    runner.close()
+    print("\n✓ Runner evaluation demo complete")
+
+
+def demo_all_tiers():
+    """Show all available task tiers."""
+    print("\n" + "=" * 60)
+    print("Demo 4: Available Tasks by Tier")
+    print("=" * 60)
+
+    available = list_available_envs()
+
+    total = 0
+    for tier_name, task_ids in sorted(available.items()):
+        print(f"\n{tier_name.upper()}:")
+        for task_id in task_ids:
+            print(f"  - {task_id}")
+        total += len(task_ids)
+
+    print(f"\nTotal: {total} tasks available")
+    print("\n✓ Task listing complete")
+
+
+def demo_observation_shapes(save_images: bool = False):
+    """Show observation and render shapes."""
+    print("\n" + "=" * 60)
+    print("Demo 5: Observation & Render Shapes")
+    print("=" * 60)
+
+    task_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    spec = TaskSpecification.from_json(str(task_path))
+
+    backend = MiniGridBackend(render_mode="rgb_array")
+    backend.configure(spec)
+    obs, state, info = backend.reset(seed=42)
+
+    print(f"\nObservation from reset():")
+    print(f"  Shape: {obs.shape}")
+    print(f"  Dtype: {obs.dtype}")
+    print(f"  Range: [{obs.min()}, {obs.max()}]")
+
+    render = backend.render()
+    print(f"\nRender output:")
+    print(f"  Shape: {render.shape}")
+    print(f"  Dtype: {render.dtype}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        save_image(obs, str(output_dir / "demo5_observation.png"))
+        save_image(render, str(output_dir / "demo5_render.png"))
+
+    backend.close()
+    print("\n✓ Observation shapes demo complete")
+
+
+def demo_deterministic_replay():
+    """Demonstrate deterministic behavior with same seed."""
+    print("\n" + "=" * 60)
+    print("Demo 6: Deterministic Replay")
+    print("=" * 60)
+
+    task_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    spec = TaskSpecification.from_json(str(task_path))
+
+    actions = [
+        MiniGridActions.TURN_RIGHT,
+        MiniGridActions.MOVE_FORWARD,
+        MiniGridActions.MOVE_FORWARD,
+        MiniGridActions.TURN_LEFT,
+        MiniGridActions.MOVE_FORWARD,
+    ]
+
+    def run_with_seed(seed):
+        backend = get_backend("minigrid", render_mode="rgb_array")
+        backend.configure(spec)
+        obs, state, _ = backend.reset(seed=seed)
+        positions = [state.agent_position]
+
+        for action in actions:
+            obs, _, _, _, state, _ = backend.step(action)
+            positions.append(state.agent_position)
+
+        backend.close()
+        return positions
+
+    # Run twice with same seed
+    positions1 = run_with_seed(42)
+    positions2 = run_with_seed(42)
+    positions3 = run_with_seed(99)  # Different seed
+
+    print(f"\nSeed 42 (run 1): {positions1}")
+    print(f"Seed 42 (run 2): {positions2}")
+    print(f"Seed 99:         {positions3}")
+
+    print(f"\nRun 1 == Run 2: {positions1 == positions2}")
+    print(f"Run 1 == Run 3: {positions1 == positions3}")
+
+    print("\n✓ Deterministic replay demo complete")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="MiniGrid Backend Demo")
+    parser.add_argument("--visual", action="store_true", help="Save PNG images")
+    parser.add_argument("--demo", type=int, help="Run specific demo (1-6)")
+    parser.add_argument("--play", action="store_true", help="Interactive play mode")
+    parser.add_argument("--task", type=str, help="Task to play (e.g., tier2/single_key_001)")
+    args = parser.parse_args()
+
+    # Interactive play mode
+    if args.play:
+        interactive_play(args.task)
+        return
+
+    print("=" * 60)
+    print("MiniGrid Backend Demo")
+    print("=" * 60)
+    print("\nThis demo uses the MiniGridBackend (gymnasium minigrid package)")
+    print("for standard square grid tasks.")
+
+    demos = [
+        demo_backend_basics,
+        demo_key_door_puzzle,
+        demo_runner_evaluation,
+        demo_all_tiers,
+        demo_observation_shapes,
+        demo_deterministic_replay,
+    ]
+
+    if args.demo:
+        if 1 <= args.demo <= len(demos):
+            demos[args.demo - 1](save_images=args.visual)
+        else:
+            print(f"Invalid demo number. Choose 1-{len(demos)}")
+    else:
+        for demo_fn in demos:
+            if demo_fn == demo_all_tiers:
+                demo_fn()  # No save_images param
+            elif demo_fn == demo_deterministic_replay:
+                demo_fn()  # No save_images param
+            else:
+                demo_fn(save_images=args.visual)
+
+    print("\n" + "=" * 60)
+    print("MiniGrid Demo Complete!")
+    print("=" * 60)
+
+    if args.visual:
+        output_dir = Path(__file__).parent / "demo_output"
+        print(f"\nImages saved to: {output_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/gridworld/demo_output/demo1_minigrid_basic.png b/src/v1_1/gridworld/demo_output/demo1_minigrid_basic.png
new file mode 100644
index 00000000..6da9fef2
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo1_minigrid_basic.png differ
diff --git a/src/v1_1/gridworld/demo_output/demo2_key_door.png b/src/v1_1/gridworld/demo_output/demo2_key_door.png
new file mode 100644
index 00000000..8ee45ab2
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo2_key_door.png differ
diff --git a/src/v1_1/gridworld/demo_output/demo3_evaluation.png b/src/v1_1/gridworld/demo_output/demo3_evaluation.png
new file mode 100644
index 00000000..4afba18f
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo3_evaluation.png differ
diff --git a/src/v1_1/gridworld/demo_output/demo5_observation.png b/src/v1_1/gridworld/demo_output/demo5_observation.png
new file mode 100644
index 00000000..213920ba
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo5_observation.png differ
diff --git a/src/v1_1/gridworld/demo_output/demo5_render.png b/src/v1_1/gridworld/demo_output/demo5_render.png
new file mode 100644
index 00000000..213920ba
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo5_render.png differ
diff --git a/src/v1_1/gridworld/demo_output/demo_observation.npy b/src/v1_1/gridworld/demo_output/demo_observation.npy
new file mode 100644
index 00000000..53dc03e6
Binary files /dev/null and b/src/v1_1/gridworld/demo_output/demo_observation.npy differ
diff --git a/src/v1_1/gridworld/envs/__init__.py b/src/v1_1/gridworld/envs/__init__.py
new file mode 100644
index 00000000..1aa43d72
--- /dev/null
+++ b/src/v1_1/gridworld/envs/__init__.py
@@ -0,0 +1,27 @@
+"""
+Pre-configured MiniGrid Environments by Tier
+
+Provides convenient access to environments organized by difficulty tier.
+"""
+
+from .tier_envs import (
+    get_tier1_envs,
+    get_tier2_envs,
+    get_tier3_envs,
+    get_tier4_envs,
+    get_tier5_envs,
+    get_all_envs,
+    get_env_by_id,
+    list_available_envs,
+)
+
+__all__ = [
+    "get_tier1_envs",
+    "get_tier2_envs",
+    "get_tier3_envs",
+    "get_tier4_envs",
+    "get_tier5_envs",
+    "get_all_envs",
+    "get_env_by_id",
+    "list_available_envs",
+]
diff --git a/src/v1_1/gridworld/envs/tier_envs.py b/src/v1_1/gridworld/envs/tier_envs.py
new file mode 100644
index 00000000..f707fcda
--- /dev/null
+++ b/src/v1_1/gridworld/envs/tier_envs.py
@@ -0,0 +1,262 @@
+"""
+Pre-configured Environments by Difficulty Tier
+
+Provides factory functions to create environments for each tier.
+Also supports loading standard MiniGrid environments as fallback.
+"""
+
+from pathlib import Path
+from typing import Optional, List, Dict
+import json
+import glob
+
+from ..task_spec import TaskSpecification
+from ..task_parser import TaskParser, load_task_from_file
+from ..backends.minigrid_backend import MiniGridBackend
+
+
+# Base path for task files
+TASKS_DIR = Path(__file__).parent.parent / "tasks"
+
+
+def _load_tasks_from_dir(tier_dir: Path) -> List[TaskSpecification]:
+    """Load all task specifications from a tier directory."""
+    tasks = []
+    if tier_dir.exists():
+        for json_file in sorted(tier_dir.glob("*.json")):
+            try:
+                spec = TaskSpecification.from_json(str(json_file))
+                tasks.append(spec)
+            except Exception as e:
+                print(f"Warning: Failed to load {json_file}: {e}")
+    return tasks
+
+
+def get_tier1_envs(render_mode: str = "rgb_array") -> List[tuple]:
+    """
+    Get Tier 1 (Navigation) environments.
+
+    Returns:
+        List of (task_spec, env) tuples
+    """
+    tier_dir = TASKS_DIR / "tier1"
+    tasks = _load_tasks_from_dir(tier_dir)
+
+    parser = TaskParser(render_mode=render_mode)
+    envs = []
+    for task in tasks:
+        try:
+            env = parser.parse(task)
+            envs.append((task, env))
+        except Exception as e:
+            print(f"Warning: Failed to create env for {task.task_id}: {e}")
+
+    return envs
+
+
+def get_tier2_envs(render_mode: str = "rgb_array") -> List[tuple]:
+    """
+    Get Tier 2 (Linear Dependencies - Keys/Doors) environments.
+
+    Returns:
+        List of (task_spec, env) tuples
+    """
+    tier_dir = TASKS_DIR / "tier2"
+    tasks = _load_tasks_from_dir(tier_dir)
+
+    parser = TaskParser(render_mode=render_mode)
+    envs = []
+    for task in tasks:
+        try:
+            env = parser.parse(task)
+            envs.append((task, env))
+        except Exception as e:
+            print(f"Warning: Failed to create env for {task.task_id}: {e}")
+
+    return envs
+
+
+def get_tier3_envs(render_mode: str = "rgb_array") -> List[tuple]:
+    """
+    Get Tier 3 (Multi-Mechanism - Keys/Doors/Switches/Gates) environments.
+
+    Returns:
+        List of (task_spec, env) tuples
+    """
+    tier_dir = TASKS_DIR / "tier3"
+    tasks = _load_tasks_from_dir(tier_dir)
+
+    parser = TaskParser(render_mode=render_mode)
+    envs = []
+    for task in tasks:
+        try:
+            env = parser.parse(task)
+            envs.append((task, env))
+        except Exception as e:
+            print(f"Warning: Failed to create env for {task.task_id}: {e}")
+
+    return envs
+
+
+def get_tier4_envs(render_mode: str = "rgb_array") -> List[tuple]:
+    """
+    Get Tier 4 (Irreversibility - Pushable blocks) environments.
+
+    Returns:
+        List of (task_spec, env) tuples
+    """
+    tier_dir = TASKS_DIR / "tier4"
+    tasks = _load_tasks_from_dir(tier_dir)
+
+    parser = TaskParser(render_mode=render_mode)
+    envs = []
+    for task in tasks:
+        try:
+            env = parser.parse(task)
+            envs.append((task, env))
+        except Exception as e:
+            print(f"Warning: Failed to create env for {task.task_id}: {e}")
+
+    return envs
+
+
+def get_tier5_envs(render_mode: str = "rgb_array") -> List[tuple]:
+    """
+    Get Tier 5 (Hidden Information) environments.
+
+    Returns:
+        List of (task_spec, env) tuples
+    """
+    tier_dir = TASKS_DIR / "tier5"
+    tasks = _load_tasks_from_dir(tier_dir)
+
+    parser = TaskParser(render_mode=render_mode)
+    envs = []
+    for task in tasks:
+        try:
+            env = parser.parse(task)
+            envs.append((task, env))
+        except Exception as e:
+            print(f"Warning: Failed to create env for {task.task_id}: {e}")
+
+    return envs
+
+
+def get_all_envs(render_mode: str = "rgb_array") -> Dict[str, List[tuple]]:
+    """
+    Get all environments organized by tier.
+
+    Returns:
+        Dictionary mapping tier names to lists of (task_spec, env) tuples
+    """
+    return {
+        "tier1": get_tier1_envs(render_mode),
+        "tier2": get_tier2_envs(render_mode),
+        "tier3": get_tier3_envs(render_mode),
+        "tier4": get_tier4_envs(render_mode),
+        "tier5": get_tier5_envs(render_mode),
+    }
+
+
+def get_env_by_id(
+    task_id: str,
+    render_mode: str = "rgb_array"
+) -> Optional[tuple]:
+    """
+    Get a specific environment by task ID.
+
+    Args:
+        task_id: The task ID to find
+        render_mode: Rendering mode for the environment
+
+    Returns:
+        (task_spec, env) tuple or None if not found
+    """
+    # Search all tier directories
+    for tier_num in range(1, 6):
+        tier_dir = TASKS_DIR / f"tier{tier_num}"
+        if tier_dir.exists():
+            for json_file in tier_dir.glob("*.json"):
+                try:
+                    spec = TaskSpecification.from_json(str(json_file))
+                    if spec.task_id == task_id:
+                        parser = TaskParser(render_mode=render_mode)
+                        env = parser.parse(spec)
+                        return (spec, env)
+                except Exception:
+                    continue
+
+    return None
+
+
+def list_available_envs() -> Dict[str, List[str]]:
+    """
+    List all available task IDs organized by tier.
+
+    Returns:
+        Dictionary mapping tier names to lists of task IDs
+    """
+    result = {}
+    for tier_num in range(1, 6):
+        tier_name = f"tier{tier_num}"
+        tier_dir = TASKS_DIR / tier_name
+        task_ids = []
+
+        if tier_dir.exists():
+            for json_file in sorted(tier_dir.glob("*.json")):
+                try:
+                    spec = TaskSpecification.from_json(str(json_file))
+                    task_ids.append(spec.task_id)
+                except Exception:
+                    task_ids.append(json_file.stem)
+
+        result[tier_name] = task_ids
+
+    return result
+
+
+def get_standard_minigrid_env(env_name: str, render_mode: str = "rgb_array"):
+    """
+    Get a standard MiniGrid environment by name.
+
+    This provides access to built-in MiniGrid environments as fallback.
+
+    Args:
+        env_name: Standard MiniGrid environment name (e.g., "MiniGrid-Empty-8x8-v0")
+        render_mode: Rendering mode
+
+    Returns:
+        Gymnasium environment
+    """
+    import gymnasium as gym
+    return gym.make(env_name, render_mode=render_mode)
+
+
+# Mapping of tiers to standard MiniGrid environments (as fallback)
+STANDARD_MINIGRID_ENVS = {
+    "tier1": [
+        "MiniGrid-Empty-5x5-v0",
+        "MiniGrid-Empty-8x8-v0",
+        "MiniGrid-Empty-16x16-v0",
+        "MiniGrid-FourRooms-v0",
+    ],
+    "tier2": [
+        "MiniGrid-DoorKey-5x5-v0",
+        "MiniGrid-DoorKey-8x8-v0",
+        "MiniGrid-DoorKey-16x16-v0",
+    ],
+    "tier3": [
+        "MiniGrid-LockedRoom-v0",
+        "MiniGrid-KeyCorridorS3R1-v0",
+        "MiniGrid-KeyCorridorS3R2-v0",
+        "MiniGrid-KeyCorridorS3R3-v0",
+    ],
+    "tier4": [
+        "MiniGrid-BlockedUnlockPickup-v0",
+    ],
+    "tier5": [
+        "MiniGrid-MemoryS7-v0",
+        "MiniGrid-MemoryS9-v0",
+        "MiniGrid-RedBlueDoors-8x8-v0",
+    ],
+}
diff --git a/src/v1_1/gridworld/runner/__init__.py b/src/v1_1/gridworld/runner/__init__.py
new file mode 100644
index 00000000..6d227a89
--- /dev/null
+++ b/src/v1_1/gridworld/runner/__init__.py
@@ -0,0 +1,13 @@
+"""
+Grid Runner Module
+
+Episode execution and trajectory collection for MiniGrid environments.
+"""
+
+from .grid_runner import GridRunner, EpisodeResult, Trajectory
+
+__all__ = [
+    "GridRunner",
+    "EpisodeResult",
+    "Trajectory",
+]
diff --git a/src/v1_1/gridworld/runner/grid_runner.py b/src/v1_1/gridworld/runner/grid_runner.py
new file mode 100644
index 00000000..282b38f2
--- /dev/null
+++ b/src/v1_1/gridworld/runner/grid_runner.py
@@ -0,0 +1,340 @@
+"""
+Grid Runner for Episode Execution
+
+Executes episodes in MiniGrid environments and collects trajectories
+for evaluation with VLM/VLA models.
+"""
+
+from dataclasses import dataclass, field
+from typing import Optional, Callable, Any
+from pathlib import Path
+import json
+import numpy as np
+
+from ..backends.base import AbstractGridBackend, GridState
+from ..backends.minigrid_backend import MiniGridBackend
+from ..task_spec import TaskSpecification
+from ..actions import ACTION_NAMES
+
+
+@dataclass
+class Trajectory:
+    """
+    A single step in an episode trajectory.
+    """
+    step: int
+    observation: np.ndarray  # RGB image
+    action: int
+    action_name: str
+    reward: float
+    state: GridState
+    info: dict = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary (without image for serialization)."""
+        return {
+            "step": self.step,
+            "action": self.action,
+            "action_name": self.action_name,
+            "reward": self.reward,
+            "state": self.state.to_dict(),
+            "info": self.info,
+        }
+
+
+@dataclass
+class EpisodeResult:
+    """
+    Result of running an episode.
+    """
+    task_id: str
+    success: bool
+    total_reward: float
+    steps_taken: int
+    max_steps: int
+    terminated: bool
+    truncated: bool
+    trajectory: list[Trajectory]
+    final_state: GridState
+    seed: int
+    mission: str
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "task_id": self.task_id,
+            "success": self.success,
+            "total_reward": self.total_reward,
+            "steps_taken": self.steps_taken,
+            "max_steps": self.max_steps,
+            "terminated": self.terminated,
+            "truncated": self.truncated,
+            "trajectory": [t.to_dict() for t in self.trajectory],
+            "final_state": self.final_state.to_dict(),
+            "seed": self.seed,
+            "mission": self.mission,
+        }
+
+    def save(self, path: str) -> None:
+        """Save episode result to JSON file."""
+        with open(path, "w") as f:
+            json.dump(self.to_dict(), f, indent=2)
+
+    @classmethod
+    def load(cls, path: str) -> "EpisodeResult":
+        """Load episode result from JSON file."""
+        with open(path, "r") as f:
+            data = json.load(f)
+        # Note: observations not included in saved trajectories
+        trajectory = [
+            Trajectory(
+                step=t["step"],
+                observation=np.zeros((64, 64, 3), dtype=np.uint8),  # Placeholder
+                action=t["action"],
+                action_name=t["action_name"],
+                reward=t["reward"],
+                state=GridState.from_dict(t["state"]),
+                info=t.get("info", {}),
+            )
+            for t in data["trajectory"]
+        ]
+        return cls(
+            task_id=data["task_id"],
+            success=data["success"],
+            total_reward=data["total_reward"],
+            steps_taken=data["steps_taken"],
+            max_steps=data["max_steps"],
+            terminated=data["terminated"],
+            truncated=data["truncated"],
+            trajectory=trajectory,
+            final_state=GridState.from_dict(data["final_state"]),
+            seed=data["seed"],
+            mission=data["mission"],
+        )
+
+
+class GridRunner:
+    """
+    Episode runner for MiniGrid environments.
+
+    Executes episodes using either:
+    - A policy function (for VLM/VLA evaluation)
+    - Random actions (for baseline)
+    - Expert demonstrations (if available)
+    """
+
+    def __init__(
+        self,
+        backend: Optional[AbstractGridBackend] = None,
+        render_mode: str = "rgb_array",
+    ):
+        """
+        Initialize the runner.
+
+        Args:
+            backend: Grid backend to use (defaults to MiniGridBackend)
+            render_mode: Rendering mode for observations
+        """
+        self.backend = backend or MiniGridBackend(render_mode=render_mode)
+        self.render_mode = render_mode
+
+    def run_episode(
+        self,
+        task_spec: TaskSpecification,
+        policy_fn: Optional[Callable[[np.ndarray, GridState, str], int]] = None,
+        seed: Optional[int] = None,
+        record_trajectory: bool = True,
+        verbose: bool = False,
+    ) -> EpisodeResult:
+        """
+        Run a single episode.
+
+        Args:
+            task_spec: Task specification defining the puzzle
+            policy_fn: Function that takes (observation, state, mission) and returns action.
+                       If None, uses random policy.
+            seed: Random seed (uses task_spec.seed if not provided)
+            record_trajectory: Whether to record full trajectory
+            verbose: Print step information
+
+        Returns:
+            EpisodeResult with episode outcomes and trajectory
+        """
+        # Configure backend
+        self.backend.configure(task_spec)
+
+        # Reset environment
+        seed = seed or task_spec.seed
+        obs, state, info = self.backend.reset(seed=seed)
+        mission = self.backend.get_mission_text()
+
+        # Initialize tracking
+        trajectory = []
+        total_reward = 0.0
+        step = 0
+        terminated = False
+        truncated = False
+
+        # Seed random number generator for deterministic random policy
+        rng = np.random.RandomState(seed)
+
+        if verbose:
+            print(f"Starting episode: {task_spec.task_id}")
+            print(f"Mission: {mission}")
+
+        while not terminated and not truncated:
+            # Get action from policy or random
+            if policy_fn is not None:
+                action = policy_fn(obs, state, mission)
+            else:
+                # Random policy with explicit seed
+                action = rng.randint(0, 7)
+
+            # Execute action
+            next_obs, reward, terminated, truncated, next_state, info = self.backend.step(action)
+            total_reward += reward
+            step += 1
+
+            if verbose:
+                action_name = ACTION_NAMES.get(action, f"action_{action}")
+                print(f"  Step {step}: {action_name} -> reward={reward:.3f}, done={terminated or truncated}")
+
+            # Record trajectory
+            if record_trajectory:
+                trajectory.append(Trajectory(
+                    step=step,
+                    observation=obs.copy(),
+                    action=action,
+                    action_name=ACTION_NAMES.get(action, f"action_{action}"),
+                    reward=reward,
+                    state=state,
+                    info=info,
+                ))
+
+            # Update for next iteration
+            obs = next_obs
+            state = next_state
+
+        # Determine success
+        success = terminated and total_reward > 0
+
+        if verbose:
+            print(f"Episode complete: success={success}, steps={step}, reward={total_reward:.3f}")
+
+        return EpisodeResult(
+            task_id=task_spec.task_id,
+            success=success,
+            total_reward=total_reward,
+            steps_taken=step,
+            max_steps=task_spec.max_steps,
+            terminated=terminated,
+            truncated=truncated,
+            trajectory=trajectory,
+            final_state=state,
+            seed=seed,
+            mission=mission,
+        )
+
+    def run_batch(
+        self,
+        task_specs: list[TaskSpecification],
+        policy_fn: Optional[Callable[[np.ndarray, GridState, str], int]] = None,
+        verbose: bool = False,
+    ) -> list[EpisodeResult]:
+        """
+        Run multiple episodes.
+
+        Args:
+            task_specs: List of task specifications
+            policy_fn: Policy function (see run_episode)
+            verbose: Print progress
+
+        Returns:
+            List of EpisodeResults
+        """
+        results = []
+        for i, spec in enumerate(task_specs):
+            if verbose:
+                print(f"\n=== Task {i+1}/{len(task_specs)}: {spec.task_id} ===")
+            result = self.run_episode(spec, policy_fn, verbose=verbose)
+            results.append(result)
+        return results
+
+    def collect_demonstrations(
+        self,
+        task_spec: TaskSpecification,
+        actions: list[int],
+        seed: Optional[int] = None,
+    ) -> EpisodeResult:
+        """
+        Execute a fixed sequence of actions to collect a demonstration.
+
+        Args:
+            task_spec: Task specification
+            actions: List of actions to execute
+            seed: Random seed
+
+        Returns:
+            EpisodeResult with the demonstration trajectory
+        """
+        def demo_policy(obs, state, mission, action_idx=[0]):
+            if action_idx[0] < len(actions):
+                action = actions[action_idx[0]]
+                action_idx[0] += 1
+                return action
+            return 6  # Wait if no more actions
+
+        return self.run_episode(task_spec, policy_fn=demo_policy, seed=seed)
+
+    def generate_observation_dataset(
+        self,
+        task_specs: list[TaskSpecification],
+        policy_fn: Optional[Callable] = None,
+        output_dir: str = "observations",
+        save_images: bool = True,
+    ) -> list[dict]:
+        """
+        Generate a dataset of observations for evaluation.
+
+        Args:
+            task_specs: List of task specifications
+            policy_fn: Policy to use (random if None)
+            output_dir: Directory to save images
+            save_images: Whether to save observation images
+
+        Returns:
+            List of observation records with metadata
+        """
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        records = []
+        for spec in task_specs:
+            result = self.run_episode(spec, policy_fn, record_trajectory=True)
+
+            for traj in result.trajectory:
+                record = {
+                    "task_id": spec.task_id,
+                    "step": traj.step,
+                    "action": traj.action,
+                    "action_name": traj.action_name,
+                    "reward": traj.reward,
+                    "mission": result.mission,
+                    "tier": spec.difficulty_tier,
+                    "agent_position": list(traj.state.agent_position),
+                    "agent_direction": traj.state.agent_direction,
+                }
+
+                if save_images:
+                    img_name = f"{spec.task_id}_step{traj.step:04d}.npy"
+                    img_path = output_path / img_name
+                    np.save(img_path, traj.observation)
+                    record["image_path"] = str(img_path)
+
+                records.append(record)
+
+        return records
+
+    def close(self):
+        """Clean up resources."""
+        self.backend.close()
diff --git a/src/v1_1/gridworld/task_parser.py b/src/v1_1/gridworld/task_parser.py
new file mode 100644
index 00000000..895edfed
--- /dev/null
+++ b/src/v1_1/gridworld/task_parser.py
@@ -0,0 +1,300 @@
+"""
+Task Parser for MiniGrid Domain
+
+Parses TaskSpecification JSON files and creates configured MiniGrid environments.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Optional, Union
+
+import gymnasium as gym
+
+from .task_spec import TaskSpecification, Position
+from .custom_env import CustomMiniGridEnv
+
+
+class TaskParser:
+    """
+    Parse TaskSpecification and create configured MiniGrid environments.
+
+    Usage:
+        parser = TaskParser()
+        env = parser.parse(task_spec)
+        # or
+        env = parser.parse_file("path/to/task.json")
+    """
+
+    def __init__(self, render_mode: Optional[str] = None):
+        """
+        Initialize the parser.
+
+        Args:
+            render_mode: Rendering mode for created environments ("human", "rgb_array", None)
+        """
+        self.render_mode = render_mode
+
+    def parse(self, spec: TaskSpecification, seed: Optional[int] = None) -> CustomMiniGridEnv:
+        """
+        Create a configured MiniGrid environment from a TaskSpecification.
+
+        This is the core parsing method that transforms a declarative JSON-based
+        TaskSpecification into a fully configured, runnable MiniGrid environment.
+
+        The parsing process follows three stages:
+        1. Validation: Ensures the spec is internally consistent (bounds checking,
+           dependency validation, etc.)
+        2. Environment Creation: Instantiates a CustomMiniGridEnv with basic parameters
+           and calls reset() to initialize the grid with border walls
+        3. Grid Population: Adds all task-specific elements (walls, keys, doors,
+           switches, gates, blocks, hazards) to the grid
+
+        Note on reset behavior: The environment's reset() method is called internally
+        to initialize the grid structure. The parser then populates the grid with
+        task-specific objects. This two-phase approach ensures proper initialization
+        order while avoiding state corruption.
+
+        Args:
+            spec: The task specification to parse. Must contain valid maze dimensions,
+                  start/goal positions, and mechanism definitions.
+            seed: Optional seed override for environment initialization. If None,
+                  uses spec.seed. This enables running the same task with different
+                  random seeds for evaluation.
+
+        Returns:
+            Configured CustomMiniGridEnv ready for use. The environment is already
+            reset and populated with all objects from the specification.
+
+        Raises:
+            ValueError: If the task specification fails validation. Error message
+                        includes all validation failures concatenated.
+        """
+        # Validate specification to catch errors early
+        # This checks bounds, dependency consistency (e.g., doors have matching keys),
+        # and other constraints defined in TaskSpecification.validate()
+        is_valid, errors = spec.validate()
+        if not is_valid:
+            raise ValueError(f"Invalid task specification: {'; '.join(errors)}")
+
+        width, height = spec.maze.dimensions
+
+        # Use provided seed or fall back to spec seed
+        # This allows the same task to be evaluated with different random seeds
+        actual_seed = seed if seed is not None else spec.seed
+
+        # Determine observability settings from spec
+        obs_mode = spec.rules.observability
+        if obs_mode == "full":
+            see_through_walls = True
+            agent_view_size = 7
+            agent_pov = False
+        elif obs_mode == "view_cone":
+            see_through_walls = False
+            agent_view_size = spec.rules.view_size
+            agent_pov = False  # Still render full grid with highlights
+        elif obs_mode == "fog_of_war":
+            # Fog of war uses view cone mechanics for current visibility,
+            # but tracks explored cells across the episode
+            see_through_walls = False
+            agent_view_size = spec.rules.view_size
+            agent_pov = False
+        else:
+            see_through_walls = True
+            agent_view_size = 7
+            agent_pov = False
+
+        # Create the base environment with core parameters
+        # The CustomMiniGridEnv is initialized but not yet populated with task objects
+        env = CustomMiniGridEnv(
+            width=width,
+            height=height,
+            max_steps=spec.max_steps,
+            agent_start_pos=spec.maze.start.to_tuple(),
+            agent_start_dir=0,  # Default facing right (standard MiniGrid convention)
+            goal_pos=spec.maze.goal.to_tuple(),
+            mission_text=spec.get_mission_text(),
+            render_mode=self.render_mode,
+            task_spec=spec,
+            see_through_walls=see_through_walls,
+            agent_view_size=agent_view_size,
+            agent_pov=agent_pov,
+        )
+
+        # Reset to initialize the grid structure
+        # CRITICAL: This call initializes the grid with border walls and sets up
+        # the base environment state. We MUST call reset() before populate_grid()
+        # to ensure the grid exists and is properly initialized.
+        env.reset(seed=actual_seed)
+
+        # Now populate the grid with task-specific elements
+        # This adds all interactive objects (keys, doors, switches, etc.) to the grid
+        # The order of placement matters for certain objects (e.g., gates before switches)
+        self._populate_grid(env, spec)
+
+        # Initialize fog-of-war by marking initial visible cells as explored
+        if obs_mode in ("view_cone", "fog_of_war"):
+            env.update_explored()
+
+        return env
+
+    def parse_file(self, path: Union[str, Path]) -> CustomMiniGridEnv:
+        """
+        Create a configured MiniGrid environment from a JSON file.
+
+        Args:
+            path: Path to the JSON task specification file
+
+        Returns:
+            Configured CustomMiniGridEnv ready for use
+        """
+        spec = TaskSpecification.from_json(str(path))
+        return self.parse(spec)
+
+    def parse_dict(self, data: dict) -> CustomMiniGridEnv:
+        """
+        Create a configured MiniGrid environment from a dictionary.
+
+        Args:
+            data: Dictionary containing task specification
+
+        Returns:
+            Configured CustomMiniGridEnv ready for use
+        """
+        spec = TaskSpecification.from_dict(data)
+        return self.parse(spec)
+
+    def _populate_grid(self, env: CustomMiniGridEnv, spec: TaskSpecification):
+        """
+        Populate the environment grid with walls and mechanisms.
+
+        This method is called after environment reset to add all task-specific
+        elements to the grid. The placement order is carefully designed to handle
+        dependencies between objects and ensure proper initialization.
+
+        Placement Strategy:
+        1. Clear interior cells (preserves border walls from reset)
+        2. Add static elements: walls, goal
+        3. Add collectible items: keys
+        4. Add barriers: doors
+        5. Add control mechanisms: gates first (so switches can reference them),
+           then switches
+        6. Add movable objects: blocks
+        7. Add hazards: lava/pits/spikes
+        8. Finalize: Set agent position (overwrites any objects at start)
+
+        Design Rationale:
+        - Gates before switches: Switches store references to gates, so gates
+          must exist in env.gates dict before switch placement
+        - Agent position last: Ensures the agent always starts at the correct
+          position even if other objects were accidentally placed there
+        - Border walls preserved: The 1-pixel border is created by reset() and
+          should never be modified
+
+        Args:
+            env: The CustomMiniGridEnv to populate (must already be reset)
+            spec: The task specification containing all object definitions
+        """
+        # Clear existing grid (except border walls)
+        # Border walls at x=0, x=width-1, y=0, y=height-1 are preserved
+        width, height = spec.maze.dimensions
+        for x in range(1, width - 1):
+            for y in range(1, height - 1):
+                env.grid.set(x, y, None)
+
+        # Place interior walls
+        # Border positions are skipped since reset() already placed walls there
+        for wall_pos in spec.maze.walls:
+            x, y = wall_pos.x, wall_pos.y
+            # Skip border positions (already have walls from reset)
+            if 0 < x < width - 1 and 0 < y < height - 1:
+                env.place_wall(x, y)
+
+        # Place goal marker
+        # The goal position is typically the win condition for navigation tasks
+        env.place_goal(spec.maze.goal.x, spec.maze.goal.y)
+
+        # Place keys
+        # Keys are collectible items that can unlock doors of matching color
+        for key in spec.mechanisms.keys:
+            env.place_key(key.position.x, key.position.y, key.color)
+
+        # Place doors
+        # Doors can be locked (requiring a matching key) or initially open
+        for door in spec.mechanisms.doors:
+            is_locked = door.initial_state == "locked"
+            env.place_door(door.position.x, door.position.y, door.requires_key, is_locked)
+
+        # Place gates BEFORE switches
+        # CRITICAL: Gates must be registered in env.gates before switches are placed,
+        # because switches store references to gate IDs and need to validate them
+        for gate in spec.mechanisms.gates:
+            is_open = gate.initial_state == "open"
+            env.place_gate(gate.position.x, gate.position.y, gate.id, is_open)
+
+        # Place switches
+        # Switches control gates. When toggled, they change the state of all
+        # gates in their controls list
+        for switch in spec.mechanisms.switches:
+            env.place_switch(
+                switch.position.x,
+                switch.position.y,
+                switch.id,
+                switch.controls,  # List of gate IDs this switch controls
+            )
+
+        # Place blocks
+        # Blocks are pushable objects (Sokoban-style) that can be moved by the agent
+        for block in spec.mechanisms.blocks:
+            env.place_block(block.position.x, block.position.y, block.id, block.color)
+
+        # Place hazards
+        # Hazards (lava, pits, spikes) typically end the episode if touched
+        for hazard in spec.mechanisms.hazards:
+            env.place_hazard(hazard.position.x, hazard.position.y, hazard.hazard_type)
+
+        # Place teleporters
+        # Teleporters come in pairs (A, B). Stepping on A teleports agent to B (and vice versa if bidirectional)
+        for teleporter in spec.mechanisms.teleporters:
+            env.place_teleporter(
+                teleporter.id,
+                teleporter.position_a.x, teleporter.position_a.y,
+                teleporter.position_b.x, teleporter.position_b.y,
+                teleporter.bidirectional,
+            )
+
+        # Set agent position (overwrite anything at start position)
+        # This is done last to ensure the agent always spawns at the correct location,
+        # even if the task specification accidentally placed another object there
+        env.set_agent_position(spec.maze.start.x, spec.maze.start.y)
+
+
+def load_task_from_file(path: Union[str, Path], render_mode: Optional[str] = None) -> CustomMiniGridEnv:
+    """
+    Convenience function to load a task from a JSON file.
+
+    Args:
+        path: Path to the JSON task specification file
+        render_mode: Rendering mode for the environment
+
+    Returns:
+        Configured CustomMiniGridEnv ready for use
+    """
+    parser = TaskParser(render_mode=render_mode)
+    return parser.parse_file(path)
+
+
+def load_task_from_dict(data: dict, render_mode: Optional[str] = None) -> CustomMiniGridEnv:
+    """
+    Convenience function to load a task from a dictionary.
+
+    Args:
+        data: Dictionary containing task specification
+        render_mode: Rendering mode for the environment
+
+    Returns:
+        Configured CustomMiniGridEnv ready for use
+    """
+    parser = TaskParser(render_mode=render_mode)
+    return parser.parse_dict(data)
diff --git a/src/v1_1/gridworld/task_spec.py b/src/v1_1/gridworld/task_spec.py
new file mode 100644
index 00000000..26ee70b7
--- /dev/null
+++ b/src/v1_1/gridworld/task_spec.py
@@ -0,0 +1,475 @@
+"""
+Task Specification Schema for MiniGrid Domain
+
+Defines the complete JSON schema for gridworld puzzles, matching the PDF specification.
+Supports tiers 1-5: Navigation, Linear Dependencies, Multi-Mechanism, Irreversibility, Hidden Info.
+"""
+
+from dataclasses import dataclass, field
+from typing import Literal, Optional, Any
+import json
+
+
+@dataclass
+class Position:
+    """2D grid position."""
+    x: int
+    y: int
+
+    def to_tuple(self) -> tuple[int, int]:
+        return (self.x, self.y)
+
+    @classmethod
+    def from_list(cls, coords: list[int]) -> "Position":
+        return cls(x=coords[0], y=coords[1])
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Position":
+        return cls(x=d["x"], y=d["y"])
+
+
+@dataclass
+class KeySpec:
+    """Key object specification."""
+    id: str
+    position: Position
+    color: str  # "red", "blue", "green", "yellow", "purple", "grey"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "KeySpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            color=d["color"]
+        )
+
+
+@dataclass
+class DoorSpec:
+    """Door object specification."""
+    id: str
+    position: Position
+    requires_key: str  # color that unlocks this door
+    initial_state: Literal["locked", "open"] = "locked"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "DoorSpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            requires_key=d["requires_key"],
+            initial_state=d.get("initial_state", "locked")
+        )
+
+
+@dataclass
+class SwitchSpec:
+    """Switch/button specification for controlling gates."""
+    id: str
+    position: Position
+    controls: list[str]  # list of gate IDs this switch controls
+    switch_type: Literal["toggle", "hold", "one_shot"] = "toggle"
+    initial_state: Literal["on", "off"] = "off"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "SwitchSpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            controls=d["controls"],
+            switch_type=d.get("switch_type", "toggle"),
+            initial_state=d.get("initial_state", "off")
+        )
+
+
+@dataclass
+class GateSpec:
+    """Gate specification (controlled by switches)."""
+    id: str
+    position: Position
+    initial_state: Literal["open", "closed"] = "closed"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "GateSpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            initial_state=d.get("initial_state", "closed")
+        )
+
+
+@dataclass
+class BlockSpec:
+    """Pushable block specification (for Sokoban-style puzzles)."""
+    id: str
+    position: Position
+    pushable: bool = True
+    color: str = "grey"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "BlockSpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            pushable=d.get("pushable", True),
+            color=d.get("color", "grey")
+        )
+
+
+@dataclass
+class TeleporterSpec:
+    """Teleporter pair specification."""
+    id: str
+    position_a: Position
+    position_b: Position
+    bidirectional: bool = True
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "TeleporterSpec":
+        return cls(
+            id=d["id"],
+            position_a=Position.from_list(d["position_a"]) if isinstance(d["position_a"], list) else Position.from_dict(d["position_a"]),
+            position_b=Position.from_list(d["position_b"]) if isinstance(d["position_b"], list) else Position.from_dict(d["position_b"]),
+            bidirectional=d.get("bidirectional", True)
+        )
+
+
+@dataclass
+class HazardSpec:
+    """Hazard/lava specification."""
+    id: str
+    position: Position
+    hazard_type: Literal["lava", "pit", "spike"] = "lava"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "HazardSpec":
+        return cls(
+            id=d["id"],
+            position=Position.from_list(d["position"]) if isinstance(d["position"], list) else Position.from_dict(d["position"]),
+            hazard_type=d.get("hazard_type", "lava")
+        )
+
+
+@dataclass
+class MazeLayout:
+    """Maze geometry and structure."""
+    dimensions: tuple[int, int]  # (width, height)
+    walls: list[Position]
+    start: Position
+    goal: Position
+    floor: Optional[list[Position]] = None  # If not specified, all non-wall cells are floor
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "MazeLayout":
+        dims = tuple(d["dimensions"])
+        walls = [Position.from_list(w) if isinstance(w, list) else Position.from_dict(w) for w in d.get("walls", [])]
+        start = Position.from_list(d["start"]) if isinstance(d["start"], list) else Position.from_dict(d["start"])
+        goal = Position.from_list(d["goal"]) if isinstance(d["goal"], list) else Position.from_dict(d["goal"])
+        floor = None
+        if "floor" in d and d["floor"]:
+            floor = [Position.from_list(f) if isinstance(f, list) else Position.from_dict(f) for f in d["floor"]]
+        return cls(dimensions=dims, walls=walls, start=start, goal=goal, floor=floor)
+
+
+@dataclass
+class MechanismSet:
+    """Collection of all interactive mechanisms in the puzzle."""
+    keys: list[KeySpec] = field(default_factory=list)
+    doors: list[DoorSpec] = field(default_factory=list)
+    switches: list[SwitchSpec] = field(default_factory=list)
+    gates: list[GateSpec] = field(default_factory=list)
+    blocks: list[BlockSpec] = field(default_factory=list)
+    teleporters: list[TeleporterSpec] = field(default_factory=list)
+    hazards: list[HazardSpec] = field(default_factory=list)
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "MechanismSet":
+        return cls(
+            keys=[KeySpec.from_dict(k) for k in d.get("keys", [])],
+            doors=[DoorSpec.from_dict(door) for door in d.get("doors", [])],
+            switches=[SwitchSpec.from_dict(s) for s in d.get("switches", [])],
+            gates=[GateSpec.from_dict(g) for g in d.get("gates", [])],
+            blocks=[BlockSpec.from_dict(b) for b in d.get("blocks", [])],
+            teleporters=[TeleporterSpec.from_dict(t) for t in d.get("teleporters", [])],
+            hazards=[HazardSpec.from_dict(h) for h in d.get("hazards", [])],
+        )
+
+
+@dataclass
+class Rules:
+    """Puzzle rule configuration."""
+    key_consumption: bool = True  # Keys are consumed when used
+    switch_type: Literal["toggle", "hold", "one_shot"] = "toggle"  # Default switch behavior
+    hidden_mechanisms: list[str] = field(default_factory=list)  # IDs of mechanisms not visible initially
+    observability: Literal["full", "view_cone", "fog_of_war"] = "full"
+    view_size: int = 7  # Agent view cone size (must be odd, >= 3). Only used when observability != "full"
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "Rules":
+        return cls(
+            key_consumption=d.get("key_consumption", True),
+            switch_type=d.get("switch_type", "toggle"),
+            hidden_mechanisms=d.get("hidden_mechanisms", []),
+            observability=d.get("observability", "full"),
+            view_size=d.get("view_size", 7),
+        )
+
+
+@dataclass
+class GoalSpec:
+    """Goal/win condition specification."""
+    goal_type: Literal["reach_position", "collect_all", "push_block_to", "survive_steps"] = "reach_position"
+    target: Optional[Position] = None  # For reach_position
+    target_ids: list[str] = field(default_factory=list)  # For collect_all or push_block_to
+    target_positions: list[Position] = field(default_factory=list)  # For push_block_to
+    auxiliary_conditions: list[str] = field(default_factory=list)  # Additional requirements
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "GoalSpec":
+        target = None
+        if "target" in d and d["target"]:
+            target = Position.from_list(d["target"]) if isinstance(d["target"], list) else Position.from_dict(d["target"])
+        target_positions = []
+        if "target_positions" in d:
+            target_positions = [
+                Position.from_list(p) if isinstance(p, list) else Position.from_dict(p)
+                for p in d["target_positions"]
+            ]
+        return cls(
+            goal_type=d.get("type", d.get("goal_type", "reach_position")),
+            target=target,
+            target_ids=d.get("target_ids", []),
+            target_positions=target_positions,
+            auxiliary_conditions=d.get("auxiliary_conditions", [])
+        )
+
+
+@dataclass
+class TaskSpecification:
+    """Complete task specification for a gridworld puzzle."""
+    task_id: str
+    seed: int
+    difficulty_tier: int  # 1-5
+    maze: MazeLayout
+    mechanisms: MechanismSet
+    rules: Rules
+    goal: GoalSpec
+    max_steps: int
+    version: str = "1.0"
+    description: str = ""  # Human-readable task description
+
+    @classmethod
+    def from_dict(cls, d: dict) -> "TaskSpecification":
+        """Parse from dictionary (e.g., loaded JSON)."""
+        # Handle nested TaskSpecification key if present
+        if "TaskSpecification" in d:
+            d = d["TaskSpecification"]
+
+        # Parse maze layout
+        maze_data = d.get("maze", {})
+        if "layout" in maze_data:
+            # Nested layout format from PDF spec
+            layout = maze_data["layout"]
+            maze_layout = MazeLayout(
+                dimensions=tuple(maze_data["dimensions"]),
+                walls=[Position.from_list(w) if isinstance(w, list) else Position.from_dict(w) for w in layout.get("walls", [])],
+                start=Position.from_list(layout["start"]) if isinstance(layout["start"], list) else Position.from_dict(layout["start"]),
+                goal=Position.from_list(layout["goal"]) if isinstance(layout["goal"], list) else Position.from_dict(layout["goal"]),
+                floor=[Position.from_list(f) if isinstance(f, list) else Position.from_dict(f) for f in layout.get("floor", [])] if layout.get("floor") else None
+            )
+            # Mechanisms may be under maze
+            mechanisms_data = maze_data.get("mechanisms", d.get("mechanisms", {}))
+        else:
+            # Flat format
+            maze_layout = MazeLayout.from_dict(maze_data) if maze_data else MazeLayout(
+                dimensions=(8, 8),
+                walls=[],
+                start=Position(1, 1),
+                goal=Position(6, 6)
+            )
+            mechanisms_data = d.get("mechanisms", {})
+
+        mechanisms = MechanismSet.from_dict(mechanisms_data)
+        rules = Rules.from_dict(d.get("rules", {}))
+        goal = GoalSpec.from_dict(d.get("goal", {}))
+
+        return cls(
+            task_id=d.get("task_id", "unknown"),
+            seed=d.get("seed", 42),
+            difficulty_tier=d.get("difficulty_tier", 1),
+            maze=maze_layout,
+            mechanisms=mechanisms,
+            rules=rules,
+            goal=goal,
+            max_steps=d.get("max_steps", 100),
+            version=d.get("version", "1.0"),
+            description=d.get("description", "")
+        )
+
+    @classmethod
+    def from_json(cls, path: str) -> "TaskSpecification":
+        """Load task specification from JSON file."""
+        with open(path, "r") as f:
+            data = json.load(f)
+        return cls.from_dict(data)
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        def pos_to_list(p: Position) -> list[int]:
+            return [p.x, p.y]
+
+        return {
+            "task_id": self.task_id,
+            "version": self.version,
+            "seed": self.seed,
+            "difficulty_tier": self.difficulty_tier,
+            "description": self.description,
+            "maze": {
+                "dimensions": list(self.maze.dimensions),
+                "walls": [pos_to_list(w) for w in self.maze.walls],
+                "start": pos_to_list(self.maze.start),
+                "goal": pos_to_list(self.maze.goal),
+                "floor": [pos_to_list(f) for f in self.maze.floor] if self.maze.floor else None
+            },
+            "mechanisms": {
+                "keys": [{"id": k.id, "position": pos_to_list(k.position), "color": k.color} for k in self.mechanisms.keys],
+                "doors": [{"id": d.id, "position": pos_to_list(d.position), "requires_key": d.requires_key, "initial_state": d.initial_state} for d in self.mechanisms.doors],
+                "switches": [{"id": s.id, "position": pos_to_list(s.position), "controls": s.controls, "switch_type": s.switch_type, "initial_state": s.initial_state} for s in self.mechanisms.switches],
+                "gates": [{"id": g.id, "position": pos_to_list(g.position), "initial_state": g.initial_state} for g in self.mechanisms.gates],
+                "blocks": [{"id": b.id, "position": pos_to_list(b.position), "pushable": b.pushable, "color": b.color} for b in self.mechanisms.blocks],
+                "teleporters": [{"id": t.id, "position_a": pos_to_list(t.position_a), "position_b": pos_to_list(t.position_b), "bidirectional": t.bidirectional} for t in self.mechanisms.teleporters],
+                "hazards": [{"id": h.id, "position": pos_to_list(h.position), "hazard_type": h.hazard_type} for h in self.mechanisms.hazards],
+            },
+            "rules": {
+                "key_consumption": self.rules.key_consumption,
+                "switch_type": self.rules.switch_type,
+                "hidden_mechanisms": self.rules.hidden_mechanisms,
+                "observability": self.rules.observability,
+                "view_size": self.rules.view_size,
+            },
+            "goal": {
+                "type": self.goal.goal_type,
+                "target": pos_to_list(self.goal.target) if self.goal.target else None,
+                "target_ids": self.goal.target_ids,
+                "target_positions": [pos_to_list(p) for p in self.goal.target_positions],
+                "auxiliary_conditions": self.goal.auxiliary_conditions
+            },
+            "max_steps": self.max_steps
+        }
+
+    def to_json(self, path: str) -> None:
+        """Save task specification to JSON file."""
+        with open(path, "w") as f:
+            json.dump(self.to_dict(), f, indent=2)
+
+    def validate(self) -> tuple[bool, list[str]]:
+        """
+        Validate the task specification for consistency.
+
+        Returns:
+            (is_valid, list of error messages)
+        """
+        errors = []
+        width, height = self.maze.dimensions
+
+        # Check dimensions
+        if width < 3 or height < 3:
+            errors.append(f"Maze dimensions too small: {width}x{height}, minimum is 3x3")
+
+        # Check start position
+        if not (0 <= self.maze.start.x < width and 0 <= self.maze.start.y < height):
+            errors.append(f"Start position {self.maze.start.to_tuple()} out of bounds")
+
+        # Check goal position
+        if not (0 <= self.maze.goal.x < width and 0 <= self.maze.goal.y < height):
+            errors.append(f"Goal position {self.maze.goal.to_tuple()} out of bounds")
+
+        # Check that start and goal are not walls
+        wall_positions = {w.to_tuple() for w in self.maze.walls}
+        if self.maze.start.to_tuple() in wall_positions:
+            errors.append("Start position is a wall")
+        if self.maze.goal.to_tuple() in wall_positions:
+            errors.append("Goal position is a wall")
+
+        # Check all mechanism positions are in bounds and not walls
+        def check_position(pos: Position, name: str):
+            if not (0 <= pos.x < width and 0 <= pos.y < height):
+                errors.append(f"{name} position {pos.to_tuple()} out of bounds")
+            elif pos.to_tuple() in wall_positions:
+                errors.append(f"{name} position {pos.to_tuple()} is a wall")
+
+        for key in self.mechanisms.keys:
+            check_position(key.position, f"Key {key.id}")
+
+        for door in self.mechanisms.doors:
+            check_position(door.position, f"Door {door.id}")
+
+        for switch in self.mechanisms.switches:
+            check_position(switch.position, f"Switch {switch.id}")
+
+        for gate in self.mechanisms.gates:
+            check_position(gate.position, f"Gate {gate.id}")
+
+        for block in self.mechanisms.blocks:
+            check_position(block.position, f"Block {block.id}")
+
+        for hazard in self.mechanisms.hazards:
+            check_position(hazard.position, f"Hazard {hazard.id}")
+
+        for teleporter in self.mechanisms.teleporters:
+            check_position(teleporter.position_a, f"Teleporter {teleporter.id} endpoint A")
+            check_position(teleporter.position_b, f"Teleporter {teleporter.id} endpoint B")
+
+        # Check door-key color consistency
+        key_colors = {k.color for k in self.mechanisms.keys}
+        for door in self.mechanisms.doors:
+            if door.requires_key not in key_colors:
+                errors.append(f"Door {door.id} requires color '{door.requires_key}' but no key of that color exists")
+
+        # Check switch-gate consistency
+        gate_ids = {g.id for g in self.mechanisms.gates}
+        for switch in self.mechanisms.switches:
+            for controlled_id in switch.controls:
+                if controlled_id not in gate_ids:
+                    errors.append(f"Switch {switch.id} controls non-existent gate '{controlled_id}'")
+
+        # Check difficulty tier
+        if not 1 <= self.difficulty_tier <= 5:
+            errors.append(f"Invalid difficulty tier: {self.difficulty_tier}, must be 1-5")
+
+        # Check max_steps
+        if self.max_steps < 1:
+            errors.append(f"Invalid max_steps: {self.max_steps}, must be positive")
+
+        return len(errors) == 0, errors
+
+    def get_mission_text(self) -> str:
+        """Generate a human-readable mission description."""
+        if self.description:
+            return self.description
+
+        parts = []
+
+        # Goal description
+        if self.goal.goal_type == "reach_position":
+            parts.append("Navigate to the goal")
+        elif self.goal.goal_type == "collect_all":
+            parts.append("Collect all required items")
+        elif self.goal.goal_type == "push_block_to":
+            parts.append("Push the block to the target position")
+        elif self.goal.goal_type == "survive_steps":
+            parts.append(f"Survive for {self.max_steps} steps")
+
+        # Mechanism hints
+        if self.mechanisms.keys:
+            parts.append(f"Keys: {len(self.mechanisms.keys)}")
+        if self.mechanisms.doors:
+            parts.append(f"Locked doors: {len(self.mechanisms.doors)}")
+        if self.mechanisms.switches:
+            parts.append(f"Switches: {len(self.mechanisms.switches)}")
+        if self.mechanisms.blocks:
+            parts.append(f"Pushable blocks: {len(self.mechanisms.blocks)}")
+        if self.mechanisms.hazards:
+            parts.append("Avoid hazards")
+
+        return ". ".join(parts) + "."
diff --git a/src/v1_1/gridworld/task_validator.py b/src/v1_1/gridworld/task_validator.py
new file mode 100644
index 00000000..6eedbb1b
--- /dev/null
+++ b/src/v1_1/gridworld/task_validator.py
@@ -0,0 +1,434 @@
+"""
+Task Validator - Beatable Path Checker
+
+Uses BFS to verify that a task specification has at least one valid
+solution path from start to goal, considering mechanism dependencies
+(keys -> doors, switches -> gates, block pushes).
+
+State space: (agent_pos, agent_dir, frozenset(inventory), frozenset(active_switches),
+              frozenset(open_gates), frozenset(block_positions))
+"""
+
+from __future__ import annotations
+
+from collections import deque
+from dataclasses import dataclass
+from typing import Optional
+
+from .task_spec import TaskSpecification, Position
+
+
+@dataclass(frozen=True)
+class ValidatorState:
+    """Immutable state for BFS search."""
+    agent_pos: tuple[int, int]
+    inventory: frozenset  # set of key colors held
+    active_switches: frozenset  # set of switch ids that are on
+    open_gates: frozenset  # set of gate ids that are open
+    open_doors: frozenset  # set of door ids that are open
+    block_positions: frozenset  # frozenset of (block_id, x, y) tuples
+
+
+class TaskValidator:
+    """
+    Validates that a task is beatable by exhaustive BFS.
+
+    Checks:
+    1. Goal is reachable from start
+    2. All mechanism dependencies are satisfiable
+    3. Block push constraints don't create deadlocks on the solution path
+
+    Note: This explores state space ignoring agent direction since the agent
+    can always turn in place. We only need to check reachability in the
+    grid graph with mechanism state transitions.
+    """
+
+    def __init__(self, spec: TaskSpecification):
+        self.spec = spec
+        self.width, self.height = spec.maze.dimensions
+
+        # Build wall set for fast lookup
+        self.walls: set[tuple[int, int]] = set()
+        for wall in spec.maze.walls:
+            self.walls.add((wall.x, wall.y))
+        # Border walls
+        for x in range(self.width):
+            self.walls.add((x, 0))
+            self.walls.add((x, self.height - 1))
+        for y in range(self.height):
+            self.walls.add((0, y))
+            self.walls.add((self.width - 1, y))
+
+        # Build mechanism lookups
+        self.doors: dict[tuple[int, int], dict] = {}
+        for door in spec.mechanisms.doors:
+            self.doors[(door.position.x, door.position.y)] = {
+                "id": door.id,
+                "color": door.requires_key,
+                "locked": door.initial_state == "locked",
+            }
+
+        self.gates: dict[tuple[int, int], str] = {}
+        for gate in spec.mechanisms.gates:
+            self.gates[(gate.position.x, gate.position.y)] = gate.id
+
+        self.gate_initial_open: set[str] = set()
+        for gate in spec.mechanisms.gates:
+            if gate.initial_state == "open":
+                self.gate_initial_open.add(gate.id)
+
+        self.switches: dict[tuple[int, int], dict] = {}
+        for switch in spec.mechanisms.switches:
+            self.switches[(switch.position.x, switch.position.y)] = {
+                "id": switch.id,
+                "controls": switch.controls,
+            }
+
+        self.keys: dict[tuple[int, int], str] = {}
+        for key in spec.mechanisms.keys:
+            self.keys[(key.position.x, key.position.y)] = key.color
+
+        self.blocks: dict[tuple[int, int], str] = {}
+        for block in spec.mechanisms.blocks:
+            self.blocks[(block.position.x, block.position.y)] = block.id
+
+        self.hazards: set[tuple[int, int]] = set()
+        for hazard in spec.mechanisms.hazards:
+            self.hazards.add((hazard.position.x, hazard.position.y))
+
+        self.teleporter_map: dict[tuple[int, int], tuple[int, int]] = {}
+        for tp in spec.mechanisms.teleporters:
+            a = (tp.position_a.x, tp.position_a.y)
+            b = (tp.position_b.x, tp.position_b.y)
+            self.teleporter_map[a] = b
+            if tp.bidirectional:
+                self.teleporter_map[b] = a
+
+        self.goal = (spec.maze.goal.x, spec.maze.goal.y)
+        self.start = (spec.maze.start.x, spec.maze.start.y)
+        self.key_consumption = spec.rules.key_consumption
+
+    def validate(self, max_states: int = 500_000) -> tuple[bool, Optional[list[tuple[int, int]]], str]:
+        """
+        Check if the task is beatable.
+
+        Returns:
+            (is_beatable, solution_path_or_None, message)
+            solution_path is a list of (x, y) positions if beatable.
+        """
+        initial_block_pos = frozenset(
+            (bid, pos[0], pos[1]) for pos, bid in self.blocks.items()
+        )
+
+        initial_open_doors = frozenset(
+            d["id"] for pos, d in self.doors.items() if not d["locked"]
+        )
+
+        initial_state = ValidatorState(
+            agent_pos=self.start,
+            inventory=frozenset(),
+            active_switches=frozenset(),
+            open_gates=frozenset(self.gate_initial_open),
+            open_doors=initial_open_doors,
+            block_positions=initial_block_pos,
+        )
+
+        # BFS
+        queue = deque()
+        queue.append((initial_state, [self.start]))
+        visited: set[ValidatorState] = {initial_state}
+        states_explored = 0
+
+        while queue:
+            if states_explored >= max_states:
+                return False, None, f"State space exceeded {max_states} states without finding solution"
+
+            state, path = queue.popleft()
+            states_explored += 1
+
+            # Check goal
+            if state.agent_pos == self.goal:
+                return True, path, f"Solution found in {len(path)} steps ({states_explored} states explored)"
+
+            # Generate successor states by moving in 4 directions
+            for dx, dy in [(0, -1), (0, 1), (-1, 0), (1, 0)]:
+                nx, ny = state.agent_pos[0] + dx, state.agent_pos[1] + dy
+
+                if not (0 <= nx < self.width and 0 <= ny < self.height):
+                    continue
+
+                next_pos = (nx, ny)
+
+                # Can't walk into walls
+                if next_pos in self.walls:
+                    continue
+
+                # Can't walk into hazards
+                if next_pos in self.hazards:
+                    continue
+
+                # Current block positions as a dict for lookup
+                block_dict = {(bx, by): bid for bid, bx, by in state.block_positions}
+
+                # Check door
+                new_inventory = state.inventory
+                new_open_doors = state.open_doors
+                if next_pos in self.doors:
+                    door_info = self.doors[next_pos]
+                    if door_info["id"] not in state.open_doors:
+                        # Door is closed/locked - need matching key
+                        if door_info["color"] in state.inventory:
+                            # Open the door, optionally consume key
+                            new_open_doors = state.open_doors | {door_info["id"]}
+                            if self.key_consumption:
+                                # Remove one key of this color
+                                inv_list = list(state.inventory)
+                                inv_list.remove(door_info["color"])
+                                new_inventory = frozenset(inv_list)
+                        else:
+                            continue  # Can't pass
+
+                # Check gate
+                if next_pos in self.gates:
+                    gate_id = self.gates[next_pos]
+                    if gate_id not in state.open_gates:
+                        continue  # Closed gate, can't pass
+
+                # Check block at next_pos
+                new_block_positions = state.block_positions
+                if next_pos in block_dict:
+                    # Try to push block
+                    push_x, push_y = nx + dx, ny + dy
+                    push_pos = (push_x, push_y)
+                    # Block can't be pushed into walls, other blocks, doors, gates, hazards
+                    if (push_pos in self.walls or push_pos in block_dict or
+                            push_pos in self.doors or push_pos in self.gates or
+                            push_pos in self.hazards or
+                            not (0 <= push_x < self.width and 0 <= push_y < self.height)):
+                        continue  # Can't push
+                    bid = block_dict[next_pos]
+                    new_block_positions = (
+                        state.block_positions - {(bid, nx, ny)} | {(bid, push_x, push_y)}
+                    )
+
+                # Pick up key if present (and not already picked up - keys are on the grid)
+                if next_pos in self.keys:
+                    key_color = self.keys[next_pos]
+                    # Simple model: keys are auto-collected when walked over
+                    # (In actual MiniGrid, pickup is explicit, but for reachability this is equivalent
+                    #  since a rational agent would always pick up keys they encounter)
+                    new_inventory = new_inventory | {key_color}
+
+                # Toggle switch if present (walk onto switch cell)
+                new_active = state.active_switches
+                new_open_gates = state.open_gates
+                if next_pos in self.switches:
+                    sw = self.switches[next_pos]
+                    sw_id = sw["id"]
+                    if sw_id in state.active_switches:
+                        new_active = state.active_switches - {sw_id}
+                        # Close controlled gates
+                        new_open_gates = state.open_gates - frozenset(sw["controls"])
+                    else:
+                        new_active = state.active_switches | {sw_id}
+                        # Open controlled gates
+                        new_open_gates = state.open_gates | frozenset(sw["controls"])
+
+                # Handle teleporter
+                actual_pos = next_pos
+                if next_pos in self.teleporter_map:
+                    actual_pos = self.teleporter_map[next_pos]
+
+                new_state = ValidatorState(
+                    agent_pos=actual_pos,
+                    inventory=new_inventory,
+                    active_switches=new_active,
+                    open_gates=new_open_gates,
+                    open_doors=new_open_doors,
+                    block_positions=new_block_positions,
+                )
+
+                if new_state not in visited:
+                    visited.add(new_state)
+                    queue.append((new_state, path + [actual_pos]))
+
+        return False, None, f"No solution found ({states_explored} states explored, all reachable states checked)"
+
+
+@dataclass
+class DifficultyReport:
+    """Difficulty metrics for a task."""
+    task_id: str
+    tier: int
+    is_beatable: bool
+    optimal_steps: int  # BFS shortest path length (0 if unbeatable)
+    states_explored: int  # BFS search space size
+    mechanism_count: int  # total interactive objects
+    mechanism_types: int  # number of distinct mechanism categories used
+    dependency_depth: int  # longest chain: key->door, switch->gate, etc.
+    grid_area: int  # width * height
+    difficulty_score: float  # composite score
+
+    def to_dict(self) -> dict:
+        return {
+            "task_id": self.task_id,
+            "tier": self.tier,
+            "is_beatable": self.is_beatable,
+            "optimal_steps": self.optimal_steps,
+            "states_explored": self.states_explored,
+            "mechanism_count": self.mechanism_count,
+            "mechanism_types": self.mechanism_types,
+            "dependency_depth": self.dependency_depth,
+            "grid_area": self.grid_area,
+            "difficulty_score": round(self.difficulty_score, 2),
+        }
+
+
+def compute_difficulty(spec: TaskSpecification) -> DifficultyReport:
+    """Compute difficulty metrics for a task specification."""
+    validator = TaskValidator(spec)
+    is_beatable, solution, message = validator.validate()
+
+    optimal_steps = len(solution) - 1 if solution else 0  # -1 because path includes start
+    # Extract states_explored from message
+    import re
+    match = re.search(r"(\d+) states explored", message)
+    states_explored = int(match.group(1)) if match else 0
+
+    # Count mechanisms
+    m = spec.mechanisms
+    keys_count = len(m.keys)
+    doors_count = len(m.doors)
+    switches_count = len(m.switches)
+    gates_count = len(m.gates)
+    blocks_count = len(m.blocks)
+    teleporters_count = len(m.teleporters)
+    hazards_count = len(m.hazards)
+    mechanism_count = (keys_count + doors_count + switches_count +
+                       gates_count + blocks_count + teleporters_count + hazards_count)
+
+    # Count distinct mechanism types used
+    type_flags = [
+        keys_count > 0,
+        doors_count > 0,
+        switches_count > 0,
+        gates_count > 0,
+        blocks_count > 0,
+        teleporters_count > 0,
+        hazards_count > 0,
+    ]
+    mechanism_types = sum(type_flags)
+
+    # Compute dependency depth (longest chain)
+    # key -> door = depth 1, switch -> gate = depth 1
+    # key + switch -> gate -> door = depth 2
+    depth = 0
+    if doors_count > 0 and keys_count > 0:
+        depth = max(depth, 1)
+    if gates_count > 0 and switches_count > 0:
+        depth = max(depth, 1)
+    if doors_count > 0 and keys_count > 0 and gates_count > 0 and switches_count > 0:
+        depth = max(depth, 2)  # Must handle both systems
+    if blocks_count > 0:
+        depth = max(depth, 1)
+    if teleporters_count > 0:
+        depth = max(depth, 1)
+    if (teleporters_count > 0 or blocks_count > 0) and (gates_count > 0 or doors_count > 0):
+        depth = max(depth, 2)
+
+    w, h = spec.maze.dimensions
+    grid_area = w * h
+
+    # Composite difficulty score:
+    # Weighted combination of optimal path length, mechanism complexity,
+    # state space size, and grid size
+    score = (
+        optimal_steps * 1.0 +          # path length (primary)
+        mechanism_count * 2.0 +         # mechanism density
+        mechanism_types * 3.0 +         # variety bonus
+        depth * 5.0 +                   # dependency chain bonus
+        (states_explored / 100.0) +     # search complexity
+        (grid_area / 50.0)              # spatial scale
+    )
+
+    return DifficultyReport(
+        task_id=spec.task_id,
+        tier=spec.difficulty_tier,
+        is_beatable=is_beatable,
+        optimal_steps=optimal_steps,
+        states_explored=states_explored,
+        mechanism_count=mechanism_count,
+        mechanism_types=mechanism_types,
+        dependency_depth=depth,
+        grid_area=grid_area,
+        difficulty_score=score,
+    )
+
+
+def validate_task_file(path: str, verbose: bool = True) -> bool:
+    """Validate a single task file and report difficulty."""
+    spec = TaskSpecification.from_json(path)
+    report = compute_difficulty(spec)
+
+    if verbose:
+        status = "PASS" if report.is_beatable else "FAIL"
+        print(f"[{status}] {spec.task_id}: optimal={report.optimal_steps} steps, "
+              f"mechanisms={report.mechanism_count} ({report.mechanism_types} types), "
+              f"depth={report.dependency_depth}, score={report.difficulty_score}")
+
+    return report.is_beatable
+
+
+def validate_all_tasks(tasks_dir: str = "gridworld/tasks", verbose: bool = True) -> dict:
+    """Validate all task files across all tiers and report difficulty."""
+    import json
+    from pathlib import Path
+
+    results = {"pass": [], "fail": [], "reports": []}
+    tasks_path = Path(tasks_dir)
+
+    for tier in range(1, 6):
+        tier_dir = tasks_path / f"tier{tier}"
+        if not tier_dir.exists():
+            continue
+
+        if verbose:
+            print(f"\n=== Tier {tier} ===")
+
+        for task_file in sorted(tier_dir.glob("*.json")):
+            spec = TaskSpecification.from_json(str(task_file))
+            report = compute_difficulty(spec)
+            results["reports"].append(report.to_dict())
+
+            if verbose:
+                status = "PASS" if report.is_beatable else "FAIL"
+                print(f"  [{status}] {report.task_id}: optimal={report.optimal_steps} steps, "
+                      f"mechanisms={report.mechanism_count}, score={report.difficulty_score}")
+
+            if report.is_beatable:
+                results["pass"].append(str(task_file))
+            else:
+                results["fail"].append(str(task_file))
+
+    if verbose:
+        total = len(results["pass"]) + len(results["fail"])
+        print(f"\n=== Summary: {len(results['pass'])}/{total} tasks beatable ===")
+        if results["fail"]:
+            print("Failed tasks:")
+            for f in results["fail"]:
+                print(f"  - {f}")
+
+        # Print difficulty ranking
+        print("\n=== Difficulty Ranking ===")
+        sorted_reports = sorted(results["reports"], key=lambda r: r["difficulty_score"])
+        for r in sorted_reports:
+            print(f"  {r['difficulty_score']:6.1f}  T{r['tier']}  {r['task_id']}")
+
+    return results
+
+
+if __name__ == "__main__":
+    import sys
+    import os
+    sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+    validate_all_tasks()
diff --git a/src/v1_1/gridworld/tasks/tier1/maze_corridor_002.json b/src/v1_1/gridworld/tasks/tier1/maze_corridor_002.json
new file mode 100644
index 00000000..e06a3c5a
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier1/maze_corridor_002.json
@@ -0,0 +1,38 @@
+{
+  "task_id": "tier1_maze_corridor_002",
+  "version": "1.0",
+  "seed": 123,
+  "difficulty_tier": 1,
+  "description": "Navigate through a corridor with walls",
+  "maze": {
+    "dimensions": [10, 6],
+    "walls": [
+      [2, 1], [2, 2], [2, 3],
+      [4, 2], [4, 3], [4, 4],
+      [6, 1], [6, 2], [6, 3],
+      [8, 2], [8, 3], [8, 4]
+    ],
+    "start": [1, 1],
+    "goal": [8, 1]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 1],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 50
+}
diff --git a/src/v1_1/gridworld/tasks/tier1/maze_rooms_003.json b/src/v1_1/gridworld/tasks/tier1/maze_rooms_003.json
new file mode 100644
index 00000000..91626332
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier1/maze_rooms_003.json
@@ -0,0 +1,36 @@
+{
+  "task_id": "tier1_maze_rooms_003",
+  "version": "1.1",
+  "seed": 456,
+  "difficulty_tier": 1,
+  "description": "Navigate through four connected rooms with doorways",
+  "maze": {
+    "dimensions": [12, 12],
+    "walls": [
+      [5, 1], [5, 2], [5, 4], [5, 5], [5, 6], [5, 7], [5, 9], [5, 10],
+      [1, 5], [2, 5], [4, 5], [5, 5], [6, 5], [7, 5], [9, 5], [10, 5]
+    ],
+    "start": [1, 1],
+    "goal": [10, 10]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [10, 10],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/tasks/tier1/maze_simple_001.json b/src/v1_1/gridworld/tasks/tier1/maze_simple_001.json
new file mode 100644
index 00000000..e644da8c
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier1/maze_simple_001.json
@@ -0,0 +1,33 @@
+{
+  "task_id": "tier1_maze_simple_001",
+  "version": "1.0",
+  "seed": 42,
+  "difficulty_tier": 1,
+  "description": "Simple navigation: reach the goal in an empty room",
+  "maze": {
+    "dimensions": [8, 8],
+    "walls": [],
+    "start": [1, 1],
+    "goal": [6, 6]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [6, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 50
+}
diff --git a/src/v1_1/gridworld/tasks/tier2/colored_doors_003.json b/src/v1_1/gridworld/tasks/tier2/colored_doors_003.json
new file mode 100644
index 00000000..f8913702
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier2/colored_doors_003.json
@@ -0,0 +1,42 @@
+{
+  "task_id": "tier2_colored_doors_003",
+  "version": "1.0",
+  "seed": 789,
+  "difficulty_tier": 2,
+  "description": "Multiple colored keys and doors - match colors correctly",
+  "maze": {
+    "dimensions": [10, 10],
+    "walls": [
+      [4, 1], [4, 2], [4, 4], [4, 5], [4, 6], [4, 7], [4, 8],
+      [7, 1], [7, 2], [7, 3], [7, 5], [7, 6], [7, 7], [7, 8]
+    ],
+    "start": [1, 1],
+    "goal": [8, 8]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [2, 8], "color": "blue"},
+      {"id": "key_green", "position": [2, 4], "color": "green"}
+    ],
+    "doors": [
+      {"id": "door_green", "position": [4, 3], "requires_key": "green", "initial_state": "locked"},
+      {"id": "door_blue", "position": [7, 4], "requires_key": "blue", "initial_state": "locked"}
+    ],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 8],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/tasks/tier2/multi_key_002.json b/src/v1_1/gridworld/tasks/tier2/multi_key_002.json
new file mode 100644
index 00000000..e1a4496e
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier2/multi_key_002.json
@@ -0,0 +1,42 @@
+{
+  "task_id": "tier2_multi_key_002",
+  "version": "1.0",
+  "seed": 123,
+  "difficulty_tier": 2,
+  "description": "Collect keys in order: blue door blocks red key, red door blocks goal",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [3, 1], [3, 2], [3, 4], [3, 5], [3, 6],
+      [6, 1], [6, 2], [6, 4], [6, 5], [6, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [1, 5], "color": "blue"},
+      {"id": "key_red", "position": [4, 3], "color": "red"}
+    ],
+    "doors": [
+      {"id": "door_blue", "position": [3, 3], "requires_key": "blue", "initial_state": "locked"},
+      {"id": "door_red", "position": [6, 3], "requires_key": "red", "initial_state": "locked"}
+    ],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 75
+}
diff --git a/src/v1_1/gridworld/tasks/tier2/single_key_001.json b/src/v1_1/gridworld/tasks/tier2/single_key_001.json
new file mode 100644
index 00000000..54f84e64
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier2/single_key_001.json
@@ -0,0 +1,39 @@
+{
+  "task_id": "tier2_single_key_001",
+  "version": "1.0",
+  "seed": 42,
+  "difficulty_tier": 2,
+  "description": "Collect the blue key to unlock the blue door and reach the goal",
+  "maze": {
+    "dimensions": [8, 8],
+    "walls": [
+      [4, 1], [4, 2], [4, 4], [4, 5], [4, 6]
+    ],
+    "start": [1, 1],
+    "goal": [6, 6]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [2, 3], "color": "blue"}
+    ],
+    "doors": [
+      {"id": "door_blue", "position": [4, 3], "requires_key": "blue", "initial_state": "locked"}
+    ],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [6, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 50
+}
diff --git a/src/v1_1/gridworld/tasks/tier3/complex_deps_003.json b/src/v1_1/gridworld/tasks/tier3/complex_deps_003.json
new file mode 100644
index 00000000..39f66a09
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier3/complex_deps_003.json
@@ -0,0 +1,47 @@
+{
+  "task_id": "tier3_complex_deps_003",
+  "version": "1.0",
+  "seed": 456,
+  "difficulty_tier": 3,
+  "description": "Keys, doors, switches, and gates - complex dependency chain",
+  "maze": {
+    "dimensions": [14, 12],
+    "walls": [
+      [4, 1], [4, 2], [4, 4], [4, 5], [4, 6], [4, 7], [4, 8], [4, 9], [4, 10],
+      [7, 1], [7, 2], [7, 3], [7, 5], [7, 6], [7, 7], [7, 8], [7, 9], [7, 10],
+      [10, 1], [10, 2], [10, 3], [10, 4], [10, 6], [10, 7], [10, 8], [10, 9], [10, 10]
+    ],
+    "start": [1, 1],
+    "goal": [12, 10]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [2, 8], "color": "blue"},
+      {"id": "key_red", "position": [5, 5], "color": "red"}
+    ],
+    "doors": [
+      {"id": "door_blue", "position": [4, 3], "requires_key": "blue", "initial_state": "locked"},
+      {"id": "door_red", "position": [7, 4], "requires_key": "red", "initial_state": "locked"}
+    ],
+    "switches": [
+      {"id": "switch_main", "position": [8, 8], "controls": ["gate_final"], "switch_type": "toggle", "initial_state": "off"}
+    ],
+    "gates": [
+      {"id": "gate_final", "position": [10, 5], "initial_state": "closed"}
+    ],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [12, 10],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 150
+}
diff --git a/src/v1_1/gridworld/tasks/tier3/gates_switches_002.json b/src/v1_1/gridworld/tasks/tier3/gates_switches_002.json
new file mode 100644
index 00000000..38b628da
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier3/gates_switches_002.json
@@ -0,0 +1,42 @@
+{
+  "task_id": "tier3_gates_switches_002",
+  "version": "1.0",
+  "seed": 123,
+  "difficulty_tier": 3,
+  "description": "Multiple switches control multiple gates - activate in correct order",
+  "maze": {
+    "dimensions": [12, 10],
+    "walls": [
+      [4, 1], [4, 2], [4, 4], [4, 5], [4, 6], [4, 7], [4, 8],
+      [8, 1], [8, 2], [8, 3], [8, 5], [8, 6], [8, 7], [8, 8]
+    ],
+    "start": [1, 1],
+    "goal": [10, 8]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [
+      {"id": "switch_a", "position": [2, 6], "controls": ["gate_1"], "switch_type": "toggle", "initial_state": "off"},
+      {"id": "switch_b", "position": [6, 2], "controls": ["gate_2"], "switch_type": "toggle", "initial_state": "off"}
+    ],
+    "gates": [
+      {"id": "gate_1", "position": [4, 3], "initial_state": "closed"},
+      {"id": "gate_2", "position": [8, 4], "initial_state": "closed"}
+    ],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [10, 8],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/tasks/tier3/key_switch_001.json b/src/v1_1/gridworld/tasks/tier3/key_switch_001.json
new file mode 100644
index 00000000..3d2bf63f
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier3/key_switch_001.json
@@ -0,0 +1,44 @@
+{
+  "task_id": "tier3_key_switch_001",
+  "version": "1.0",
+  "seed": 42,
+  "difficulty_tier": 3,
+  "description": "Collect key to open door, then press switch to open gate to reach goal",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [3, 1], [3, 2], [3, 4], [3, 5], [3, 6],
+      [6, 1], [6, 2], [6, 3], [6, 5], [6, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [1, 5], "color": "blue"}
+    ],
+    "doors": [
+      {"id": "door_blue", "position": [3, 3], "requires_key": "blue", "initial_state": "locked"}
+    ],
+    "switches": [
+      {"id": "switch_1", "position": [4, 5], "controls": ["gate_1"], "switch_type": "toggle", "initial_state": "off"}
+    ],
+    "gates": [
+      {"id": "gate_1", "position": [6, 4], "initial_state": "closed"}
+    ],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 75
+}
diff --git a/src/v1_1/gridworld/tasks/tier4/blocked_path_002.json b/src/v1_1/gridworld/tasks/tier4/blocked_path_002.json
new file mode 100644
index 00000000..188e1e5a
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier4/blocked_path_002.json
@@ -0,0 +1,40 @@
+{
+  "task_id": "tier4_blocked_path_002",
+  "version": "1.0",
+  "seed": 123,
+  "difficulty_tier": 4,
+  "description": "Push blocks to clear a path - wrong moves can block progress",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [1, 4], [2, 4], [3, 4],
+      [5, 4], [6, 4], [7, 4], [8, 4],
+      [5, 1], [5, 2], [5, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [
+      {"id": "block_a", "position": [4, 4], "pushable": true, "color": "grey"},
+      {"id": "block_b", "position": [5, 3], "pushable": true, "color": "blue"}
+    ],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 75
+}
diff --git a/src/v1_1/gridworld/tasks/tier4/consumable_003.json b/src/v1_1/gridworld/tasks/tier4/consumable_003.json
new file mode 100644
index 00000000..7cc67373
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier4/consumable_003.json
@@ -0,0 +1,42 @@
+{
+  "task_id": "tier4_consumable_003",
+  "version": "1.1",
+  "seed": 456,
+  "difficulty_tier": 4,
+  "description": "Keys are consumed when used. One key, two doors - only one leads to the goal. Choose wisely.",
+  "maze": {
+    "dimensions": [12, 10],
+    "walls": [
+      [4, 1], [4, 2], [4, 4], [4, 5], [4, 6], [4, 7], [4, 8],
+      [8, 1], [8, 2], [8, 4], [8, 6], [8, 7], [8, 8],
+      [9, 4], [10, 1], [10, 2], [10, 3], [10, 4]
+    ],
+    "start": [1, 1],
+    "goal": [10, 8]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_blue", "position": [2, 6], "color": "blue"}
+    ],
+    "doors": [
+      {"id": "door_blue_trap", "position": [8, 3], "requires_key": "blue", "initial_state": "locked"},
+      {"id": "door_blue_goal", "position": [8, 5], "requires_key": "blue", "initial_state": "locked"}
+    ],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [10, 8],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/tasks/tier4/push_block_001.json b/src/v1_1/gridworld/tasks/tier4/push_block_001.json
new file mode 100644
index 00000000..6ba680cf
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier4/push_block_001.json
@@ -0,0 +1,37 @@
+{
+  "task_id": "tier4_push_block_001",
+  "version": "1.1",
+  "seed": 42,
+  "difficulty_tier": 4,
+  "description": "Push the block out of the way to clear the passage and reach the goal",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [4, 1], [4, 2], [4, 5], [4, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [
+      {"id": "block_1", "position": [4, 3], "pushable": true, "color": "grey"}
+    ],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 50
+}
diff --git a/src/v1_1/gridworld/tasks/tier5/hidden_switch_001.json b/src/v1_1/gridworld/tasks/tier5/hidden_switch_001.json
new file mode 100644
index 00000000..461321d6
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier5/hidden_switch_001.json
@@ -0,0 +1,41 @@
+{
+  "task_id": "tier5_hidden_switch_001",
+  "version": "1.0",
+  "seed": 42,
+  "difficulty_tier": 5,
+  "description": "A switch controls the gate but the connection is not visible - must infer from trial",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [5, 1], [5, 2], [5, 4], [5, 5], [5, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [
+      {"id": "hidden_switch", "position": [2, 5], "controls": ["secret_gate"], "switch_type": "toggle", "initial_state": "off"}
+    ],
+    "gates": [
+      {"id": "secret_gate", "position": [5, 3], "initial_state": "closed"}
+    ],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": ["hidden_switch"],
+    "observability": "view_cone",
+    "view_size": 5
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 75
+}
diff --git a/src/v1_1/gridworld/tasks/tier5/infer_color_002.json b/src/v1_1/gridworld/tasks/tier5/infer_color_002.json
new file mode 100644
index 00000000..7d1b2f4a
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier5/infer_color_002.json
@@ -0,0 +1,41 @@
+{
+  "task_id": "tier5_infer_color_002",
+  "version": "1.0",
+  "seed": 123,
+  "difficulty_tier": 5,
+  "description": "Door color must be inferred - try keys to discover which works",
+  "maze": {
+    "dimensions": [10, 8],
+    "walls": [
+      [5, 1], [5, 2], [5, 4], [5, 5], [5, 6]
+    ],
+    "start": [1, 1],
+    "goal": [8, 6]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_red", "position": [2, 2], "color": "red"},
+      {"id": "key_blue", "position": [2, 5], "color": "blue"},
+      {"id": "key_green", "position": [3, 3], "color": "green"}
+    ],
+    "doors": [
+      {"id": "mystery_door", "position": [5, 3], "requires_key": "blue", "initial_state": "locked"}
+    ],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": []
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": ["mystery_door"]
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [8, 6],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/tasks/tier5/memory_003.json b/src/v1_1/gridworld/tasks/tier5/memory_003.json
new file mode 100644
index 00000000..3df7d330
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier5/memory_003.json
@@ -0,0 +1,49 @@
+{
+  "task_id": "tier5_memory_003",
+  "version": "1.1",
+  "seed": 456,
+  "difficulty_tier": 5,
+  "description": "Complex multi-step puzzle: activate switch, collect key, navigate hazards, unlock door to reach goal",
+  "maze": {
+    "dimensions": [12, 10],
+    "walls": [
+      [4, 1], [4, 2], [4, 3], [4, 5], [4, 6], [4, 7], [4, 8],
+      [8, 1], [8, 2], [8, 3], [8, 4], [8, 6], [8, 7], [8, 8]
+    ],
+    "start": [1, 1],
+    "goal": [10, 8]
+  },
+  "mechanisms": {
+    "keys": [
+      {"id": "key_purple", "position": [2, 7], "color": "purple"}
+    ],
+    "doors": [
+      {"id": "door_purple", "position": [8, 5], "requires_key": "purple", "initial_state": "locked"}
+    ],
+    "switches": [
+      {"id": "switch_a", "position": [2, 2], "controls": ["gate_a"], "switch_type": "toggle", "initial_state": "off"}
+    ],
+    "gates": [
+      {"id": "gate_a", "position": [4, 4], "initial_state": "closed"}
+    ],
+    "blocks": [],
+    "teleporters": [],
+    "hazards": [
+      {"id": "hazard_1", "position": [6, 6], "hazard_type": "lava"},
+      {"id": "hazard_2", "position": [7, 6], "hazard_type": "lava"}
+    ]
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": [],
+    "observability": "fog_of_war",
+    "view_size": 7
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [10, 8],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 150
+}
diff --git a/src/v1_1/gridworld/tasks/tier5/teleporter_004.json b/src/v1_1/gridworld/tasks/tier5/teleporter_004.json
new file mode 100644
index 00000000..e675f9dc
--- /dev/null
+++ b/src/v1_1/gridworld/tasks/tier5/teleporter_004.json
@@ -0,0 +1,52 @@
+{
+  "task_id": "tier5_teleporter_004",
+  "version": "1.0",
+  "seed": 42,
+  "difficulty_tier": 5,
+  "description": "Use teleporters to bypass wall barriers and reach the goal. A bidirectional teleporter connects two isolated chambers.",
+  "maze": {
+    "dimensions": [12, 10],
+    "walls": [
+      [4, 1], [4, 2], [4, 3], [4, 4], [4, 5], [4, 6], [4, 7], [4, 8],
+      [8, 1], [8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 7], [8, 8]
+    ],
+    "start": [1, 1],
+    "goal": [10, 8]
+  },
+  "mechanisms": {
+    "keys": [],
+    "doors": [],
+    "switches": [],
+    "gates": [],
+    "blocks": [],
+    "teleporters": [
+      {
+        "id": "portal_1",
+        "position_a": [2, 5],
+        "position_b": [6, 5],
+        "bidirectional": true
+      },
+      {
+        "id": "portal_2",
+        "position_a": [6, 3],
+        "position_b": [10, 3],
+        "bidirectional": false
+      }
+    ],
+    "hazards": [
+      {"id": "lava_1", "position": [6, 7], "hazard_type": "lava"},
+      {"id": "lava_2", "position": [6, 8], "hazard_type": "lava"}
+    ]
+  },
+  "rules": {
+    "key_consumption": true,
+    "switch_type": "toggle",
+    "hidden_mechanisms": []
+  },
+  "goal": {
+    "type": "reach_position",
+    "target": [10, 8],
+    "auxiliary_conditions": []
+  },
+  "max_steps": 100
+}
diff --git a/src/v1_1/gridworld/test_minigrid.py b/src/v1_1/gridworld/test_minigrid.py
new file mode 100644
index 00000000..8e19c8ff
--- /dev/null
+++ b/src/v1_1/gridworld/test_minigrid.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+"""
+Test script for MiniGrid domain implementation.
+
+Verifies that:
+1. Task specifications load correctly
+2. Environments can be created from specs
+3. Actions execute properly
+4. Rendering works
+"""
+
+import sys
+from pathlib import Path
+import numpy as np
+
+# Add parent directory to path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+def test_task_spec_loading():
+    """Test loading task specifications from JSON."""
+    print("\n=== Testing Task Specification Loading ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+
+    # Test loading tier1 task
+    tier1_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    if tier1_path.exists():
+        spec = TaskSpecification.from_json(str(tier1_path))
+        print(f"✓ Loaded task: {spec.task_id}")
+        print(f"  Tier: {spec.difficulty_tier}")
+        print(f"  Dimensions: {spec.maze.dimensions}")
+        print(f"  Start: {spec.maze.start.to_tuple()}")
+        print(f"  Goal: {spec.maze.goal.to_tuple()}")
+        print(f"  Max steps: {spec.max_steps}")
+
+        # Test validation
+        is_valid, errors = spec.validate()
+        if is_valid:
+            print(f"✓ Validation passed")
+        else:
+            print(f"✗ Validation failed: {errors}")
+
+        # Test mission text generation
+        mission = spec.get_mission_text()
+        print(f"  Mission: {mission}")
+    else:
+        print(f"✗ Task file not found: {tier1_path}")
+
+    return True
+
+
+def test_task_parser():
+    """Test parsing task specs into environments."""
+    print("\n=== Testing Task Parser ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+    from v1_1.gridworld.task_parser import TaskParser
+
+    parser = TaskParser(render_mode="rgb_array")
+
+    # Test tier 1
+    tier1_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    if tier1_path.exists():
+        spec = TaskSpecification.from_json(str(tier1_path))
+        env = parser.parse(spec)
+        print(f"✓ Created environment for {spec.task_id}")
+        print(f"  Grid size: {env.width}x{env.height}")
+        print(f"  Agent position: {env.agent_pos}")
+        print(f"  Agent direction: {env.agent_dir}")
+
+        # Test reset
+        obs, info = env.reset(seed=42)
+        print(f"✓ Environment reset successful")
+
+        # Test render
+        img = env.render()
+        print(f"✓ Rendered image shape: {img.shape}")
+
+        env.close()
+    else:
+        print(f"✗ Task file not found: {tier1_path}")
+
+    return True
+
+
+def test_environment_step():
+    """Test taking steps in the environment."""
+    print("\n=== Testing Environment Step ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+    from v1_1.gridworld.task_parser import TaskParser
+    from v1_1.gridworld.actions import MiniGridActions, ACTION_NAMES
+
+    parser = TaskParser(render_mode="rgb_array")
+
+    tier1_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    if tier1_path.exists():
+        spec = TaskSpecification.from_json(str(tier1_path))
+        env = parser.parse(spec)
+        obs, info = env.reset(seed=42)
+
+        print(f"Starting position: {env.agent_pos}")
+
+        # Take a few steps
+        actions = [
+            MiniGridActions.TURN_RIGHT,
+            MiniGridActions.MOVE_FORWARD,
+            MiniGridActions.MOVE_FORWARD,
+            MiniGridActions.TURN_LEFT,
+            MiniGridActions.MOVE_FORWARD,
+        ]
+
+        total_reward = 0
+        for i, action in enumerate(actions):
+            obs, reward, terminated, truncated, info = env.step(action)
+            total_reward += reward
+            action_name = ACTION_NAMES.get(action, f"action_{action}")
+            print(f"  Step {i+1}: {action_name} -> pos={env.agent_pos}, reward={reward:.3f}, done={terminated or truncated}")
+
+            if terminated or truncated:
+                break
+
+        print(f"✓ Completed {len(actions)} steps, total reward: {total_reward:.3f}")
+        env.close()
+    else:
+        print(f"✗ Task file not found: {tier1_path}")
+
+    return True
+
+
+def test_backend():
+    """Test the MiniGrid backend wrapper."""
+    print("\n=== Testing MiniGrid Backend ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+    from v1_1.gridworld.backends.minigrid_backend import MiniGridBackend
+
+    backend = MiniGridBackend(render_mode="rgb_array")
+
+    tier2_path = Path(__file__).parent / "tasks" / "tier2" / "single_key_001.json"
+    if tier2_path.exists():
+        spec = TaskSpecification.from_json(str(tier2_path))
+        backend.configure(spec)
+
+        obs, state, info = backend.reset(seed=42)
+        print(f"✓ Backend reset successful")
+        print(f"  Agent position: {state.agent_position}")
+        print(f"  Agent direction: {state.agent_direction}")
+        print(f"  Observation shape: {obs.shape}")
+
+        # Take a step
+        obs, reward, terminated, truncated, state, info = backend.step(2)  # Move forward
+        print(f"✓ Backend step successful")
+        print(f"  New position: {state.agent_position}")
+
+        # Get mission
+        mission = backend.get_mission_text()
+        print(f"  Mission: {mission}")
+
+        backend.close()
+    else:
+        print(f"✗ Task file not found: {tier2_path}")
+
+    return True
+
+
+def test_runner():
+    """Test the grid runner."""
+    print("\n=== Testing Grid Runner ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+    from v1_1.gridworld.runner.grid_runner import GridRunner
+
+    runner = GridRunner(render_mode="rgb_array")
+
+    tier1_path = Path(__file__).parent / "tasks" / "tier1" / "maze_simple_001.json"
+    if tier1_path.exists():
+        spec = TaskSpecification.from_json(str(tier1_path))
+
+        # Run episode with random policy
+        result = runner.run_episode(spec, policy_fn=None, verbose=False)
+        print(f"✓ Episode completed: {spec.task_id}")
+        print(f"  Success: {result.success}")
+        print(f"  Steps taken: {result.steps_taken}")
+        print(f"  Total reward: {result.total_reward:.3f}")
+        print(f"  Terminated: {result.terminated}")
+        print(f"  Truncated: {result.truncated}")
+        print(f"  Trajectory length: {len(result.trajectory)}")
+
+        runner.close()
+    else:
+        print(f"✗ Task file not found: {tier1_path}")
+
+    return True
+
+
+def test_tier_envs():
+    """Test loading environments by tier."""
+    print("\n=== Testing Tier Environment Loading ===")
+
+    from v1_1.gridworld.envs.tier_envs import list_available_envs, get_tier1_envs
+
+    # List available
+    available = list_available_envs()
+    for tier, tasks in available.items():
+        print(f"  {tier}: {len(tasks)} tasks - {tasks}")
+
+    # Load tier 1
+    tier1_envs = get_tier1_envs(render_mode="rgb_array")
+    print(f"✓ Loaded {len(tier1_envs)} tier 1 environments")
+
+    for spec, env in tier1_envs:
+        print(f"  - {spec.task_id}: {spec.maze.dimensions}")
+        env.close()
+
+    return True
+
+
+def test_all_tiers():
+    """Test that all tier tasks load correctly."""
+    print("\n=== Testing All Tier Tasks ===")
+
+    from v1_1.gridworld.task_spec import TaskSpecification
+    from v1_1.gridworld.task_parser import TaskParser
+
+    parser = TaskParser(render_mode="rgb_array")
+    tasks_dir = Path(__file__).parent / "tasks"
+
+    for tier_num in range(1, 6):
+        tier_dir = tasks_dir / f"tier{tier_num}"
+        if tier_dir.exists():
+            task_files = list(tier_dir.glob("*.json"))
+            loaded = 0
+            for task_file in task_files:
+                try:
+                    spec = TaskSpecification.from_json(str(task_file))
+                    env = parser.parse(spec)
+                    obs, info = env.reset(seed=spec.seed)
+                    env.close()
+                    loaded += 1
+                except Exception as e:
+                    print(f"  ✗ Failed to load {task_file.name}: {e}")
+
+            print(f"✓ Tier {tier_num}: {loaded}/{len(task_files)} tasks loaded successfully")
+        else:
+            print(f"  Tier {tier_num} directory not found")
+
+    return True
+
+
+def main():
+    """Run all tests."""
+    print("=" * 60)
+    print("MiniGrid Domain Implementation Tests")
+    print("=" * 60)
+
+    tests = [
+        ("Task Specification Loading", test_task_spec_loading),
+        ("Task Parser", test_task_parser),
+        ("Environment Step", test_environment_step),
+        ("MiniGrid Backend", test_backend),
+        ("Grid Runner", test_runner),
+        ("Tier Environments", test_tier_envs),
+        ("All Tiers", test_all_tiers),
+    ]
+
+    passed = 0
+    failed = 0
+
+    for name, test_fn in tests:
+        try:
+            result = test_fn()
+            if result:
+                passed += 1
+            else:
+                failed += 1
+        except Exception as e:
+            print(f"✗ {name} failed with exception: {e}")
+            import traceback
+            traceback.print_exc()
+            failed += 1
+
+    print("\n" + "=" * 60)
+    print(f"Results: {passed} passed, {failed} failed")
+    print("=" * 60)
+
+    return failed == 0
+
+
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)
diff --git a/src/v1_1/interactive_demo.py b/src/v1_1/interactive_demo.py
new file mode 100644
index 00000000..f08dc37d
--- /dev/null
+++ b/src/v1_1/interactive_demo.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+"""
+Interactive pygame demo for MultiGrid.
+
+Controls:
+- Arrow Keys / WASD: Move agent (FORWARD in facing direction)
+- Q/E: Turn left/right
+- SPACE: Pick up / Drop object
+- P: Push object
+- R: Reset environment
+- 1/2/3: Switch between Square/Hex/Triangle grids
+- ESC: Quit
+"""
+
+import sys
+import os
+import pygame
+import math
+import numpy as np
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from multigrid.env import MultiGridEnv
+from multigrid.agent import Action
+
+
+# Colors
+WHITE = (255, 255, 255)
+BLACK = (0, 0, 0)
+GRAY = (200, 200, 200)
+LIGHT_GRAY = (240, 240, 240)
+DARK_GRAY = (100, 100, 100)
+BLUE = (50, 100, 255)
+RED = (255, 50, 50)
+GREEN = (50, 255, 50)
+YELLOW = (255, 255, 50)
+PURPLE = (200, 50, 200)
+ORANGE = (255, 165, 0)
+
+
+def draw_hex(surface, center, size, color, filled=True):
+    """Draw a hexagon."""
+    vertices = []
+    for i in range(6):
+        angle = math.pi / 2 - i * math.pi / 3
+        x = center[0] + size * math.cos(angle)
+        y = center[1] - size * math.sin(angle)
+        vertices.append((x, y))
+
+    if filled:
+        pygame.draw.polygon(surface, color, vertices)
+    pygame.draw.polygon(surface, BLACK, vertices, 2)
+
+
+def draw_triangle(surface, center, size, color, pointing_up, filled=True):
+    """
+    Draw an equilateral triangle.
+
+    Args:
+        center: (x, y) position of triangle centroid
+        size: height of the triangle
+        pointing_up: True for upward pointing, False for downward
+    """
+    # For equilateral triangle with height h:
+    # - Side length s = 2h / sqrt(3)
+    # - Half of base = s / 2 = h / sqrt(3)
+    # - Centroid is h/3 from base, 2h/3 from apex
+
+    half_base = size / math.sqrt(3)
+
+    if pointing_up:
+        # Apex is 2/3 of height above centroid
+        # Base is 1/3 of height below centroid
+        vertices = [
+            (center[0], center[1] - 2 * size / 3),  # Top apex
+            (center[0] - half_base, center[1] + size / 3),  # Bottom left
+            (center[0] + half_base, center[1] + size / 3)   # Bottom right
+        ]
+    else:
+        # Apex is 2/3 of height below centroid
+        # Base is 1/3 of height above centroid
+        vertices = [
+            (center[0], center[1] + 2 * size / 3),  # Bottom apex
+            (center[0] - half_base, center[1] - size / 3),  # Top left
+            (center[0] + half_base, center[1] - size / 3)   # Top right
+        ]
+
+    if filled:
+        pygame.draw.polygon(surface, color, vertices)
+    pygame.draw.polygon(surface, BLACK, vertices, 2)
+
+
+def draw_square(surface, center, size, color, filled=True):
+    """Draw a square."""
+    rect = pygame.Rect(center[0] - size / 2, center[1] - size / 2, size, size)
+    if filled:
+        pygame.draw.rect(surface, color, rect)
+    pygame.draw.rect(surface, BLACK, rect, 2)
+
+
+def draw_agent(surface, center, size, facing_angle):
+    """Draw the agent as a triangle pointing in facing direction."""
+    # Draw body (circle)
+    pygame.draw.circle(surface, BLUE, (int(center[0]), int(center[1])), int(size * 0.6))
+
+    # Draw facing indicator (triangle)
+    indicator_size = size * 0.8
+    angle = facing_angle
+    vertices = [
+        (center[0] + indicator_size * math.cos(angle),
+         center[1] - indicator_size * math.sin(angle)),
+        (center[0] + indicator_size * 0.3 * math.cos(angle + 2.5),
+         center[1] - indicator_size * 0.3 * math.sin(angle + 2.5)),
+        (center[0] + indicator_size * 0.3 * math.cos(angle - 2.5),
+         center[1] - indicator_size * 0.3 * math.sin(angle - 2.5))
+    ]
+    pygame.draw.polygon(surface, WHITE, vertices)
+    pygame.draw.polygon(surface, BLACK, vertices, 1)
+
+
+def draw_object(surface, center, size, color):
+    """Draw an object (cube)."""
+    pygame.draw.circle(surface, color, (int(center[0]), int(center[1])), int(size * 0.5))
+    pygame.draw.circle(surface, BLACK, (int(center[0]), int(center[1])), int(size * 0.5), 2)
+
+
+class InteractiveDemo:
+    def __init__(self, width=800, height=800):
+        pygame.init()
+        self.width = width
+        self.height = height
+        self.screen = pygame.display.set_mode((width, height + 100))  # Extra space for info
+        pygame.display.set_caption("MultiGrid Interactive Demo")
+        self.clock = pygame.time.Clock()
+        self.font = pygame.font.Font(None, 24)
+        self.big_font = pygame.font.Font(None, 36)
+
+        self.tiling_type = "square"
+        self.grid_size = 10
+
+        self.env = None
+        self.reset_env()
+
+    def reset_env(self):
+        """Create/reset the environment."""
+        task_spec = {
+            "task_id": "interactive_demo",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "cube_red",
+                        "type": "movable",
+                        "color": "red",
+                        "position": {"x": 0.7, "y": 0.3},
+                        "size": 0.1
+                    },
+                    {
+                        "id": "cube_green",
+                        "type": "movable",
+                        "color": "green",
+                        "position": {"x": 0.3, "y": 0.7},
+                        "size": 0.1
+                    }
+                ],
+                "agent": {
+                    "position": {"x": 0.2, "y": 0.2},
+                    "facing": 1  # Facing east
+                }
+            },
+            "goal": {},
+            "limits": {"max_steps": 1000},
+            "tiling": {"type": self.tiling_type, "grid_size": {"width": self.grid_size, "height": self.grid_size}}
+        }
+
+        self.env = MultiGridEnv(task_spec, tiling=self.tiling_type)
+        self.env.reset()
+
+    def handle_input(self):
+        """Handle keyboard input."""
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                return False
+            elif event.type == pygame.KEYDOWN:
+                if event.key == pygame.K_ESCAPE:
+                    return False
+                elif event.key == pygame.K_r:
+                    self.reset_env()
+                elif event.key == pygame.K_1:
+                    self.tiling_type = "square"
+                    self.reset_env()
+                elif event.key == pygame.K_2:
+                    self.tiling_type = "hex"
+                    self.reset_env()
+                elif event.key == pygame.K_3:
+                    self.tiling_type = "triangle"
+                    self.reset_env()
+                elif event.key in [pygame.K_UP, pygame.K_w]:
+                    self.env.step(Action.FORWARD)
+                elif event.key in [pygame.K_DOWN, pygame.K_s]:
+                    self.env.step(Action.BACKWARD)
+                elif event.key in [pygame.K_LEFT, pygame.K_a, pygame.K_q]:
+                    self.env.step(Action.TURN_LEFT)
+                elif event.key in [pygame.K_RIGHT, pygame.K_d, pygame.K_e]:
+                    self.env.step(Action.TURN_RIGHT)
+                elif event.key == pygame.K_SPACE:
+                    if self.env.state.agent.holding:
+                        self.env.step(Action.DROP)
+                    else:
+                        self.env.step(Action.PICKUP)
+                elif event.key == pygame.K_p:
+                    self.env.step(Action.PUSH)
+
+        return True
+
+    def draw_grid(self):
+        """Draw the grid."""
+        self.screen.fill(WHITE)
+
+        tiling = self.env.tiling
+
+        # Calculate proper cell sizes for each tiling type
+        margin = 50
+        usable_width = self.width - 2 * margin
+        usable_height = self.height - 2 * margin
+
+        # Draw grid cells
+        for cell_id, cell in tiling.cells.items():
+            x_norm, y_norm = cell.position_hint
+            x = x_norm * usable_width + margin
+            y = y_norm * usable_height + margin
+
+            if self.tiling_type == "square":
+                cell_size = usable_width / self.grid_size
+                draw_square(self.screen, (x, y), cell_size, LIGHT_GRAY, filled=True)
+            elif self.tiling_type == "hex":
+                # Calculate hex size matching HexTiling coordinate system
+                width_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+                height_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+                size_from_width = 0.95 / ((self.grid_size + 0.5) * math.sqrt(3)) if self.grid_size > 0 else 0.1
+                size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+                size = min(size_from_width, size_from_height)
+                # Convert to screen space
+                hex_size = size * usable_width
+                draw_hex(self.screen, (x, y), hex_size, LIGHT_GRAY, filled=True)
+            elif self.tiling_type == "triangle":
+                # Triangles are subdivisions of hexagons
+                # Parse triangle ID: tri_hexcol_hexrow_triidx
+                parts = cell_id.split("_")
+                if len(parts) == 4:
+                    from multigrid.tilings.hex import OffsetCoord, offset_to_axial
+                    _, hex_col_str, hex_row_str, tri_idx_str = parts
+                    tri_idx = int(tri_idx_str)
+                    hex_col = int(hex_col_str)
+                    hex_row = int(hex_row_str)
+
+                    # Calculate hex size (same as HexTiling)
+                    width_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+                    height_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+                    size_from_width = 0.95 / ((self.grid_size + 0.5) * math.sqrt(3)) if self.grid_size > 0 else 0.1
+                    size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+                    hex_size = min(size_from_width, size_from_height)
+
+                    # Calculate hex center in normalized coordinates
+                    col_pos = hex_col * math.sqrt(3) * hex_size
+                    row_pos = hex_row * 1.5 * hex_size
+                    if hex_row % 2 == 1:
+                        col_pos += math.sqrt(3) / 2 * hex_size
+
+                    grid_width = (self.grid_size + 0.5) * math.sqrt(3) * hex_size
+                    grid_height = (self.grid_size - 0.5) * 1.5 * hex_size
+                    x_offset = (1.0 - grid_width) / 2
+                    y_offset = (1.0 - grid_height) / 2
+
+                    hex_center_x_norm = col_pos + x_offset
+                    hex_center_y_norm = row_pos + y_offset
+
+                    # Convert to screen coordinates
+                    hex_center_x = hex_center_x_norm * usable_width + margin
+                    hex_center_y = hex_center_y_norm * usable_height + margin
+                    hex_size_screen = hex_size * usable_width
+
+                    # Calculate the 3 vertices of this triangle
+                    angle_apex = math.pi / 2 - tri_idx * math.pi / 3
+                    angle_base1 = math.pi / 2 - ((tri_idx - 1) % 6) * math.pi / 3
+                    angle_base2 = math.pi / 2 - ((tri_idx + 1) % 6) * math.pi / 3
+
+                    # Apex vertex
+                    apex_x = hex_center_x + hex_size_screen * math.cos(angle_apex)
+                    apex_y = hex_center_y - hex_size_screen * math.sin(angle_apex)
+
+                    # Base vertices (adjacent hex vertices)
+                    base1_x = hex_center_x + hex_size_screen * math.cos(angle_base1)
+                    base1_y = hex_center_y - hex_size_screen * math.sin(angle_base1)
+
+                    base2_x = hex_center_x + hex_size_screen * math.cos(angle_base2)
+                    base2_y = hex_center_y - hex_size_screen * math.sin(angle_base2)
+
+                    vertices = [
+                        (apex_x, apex_y),
+                        (base1_x, base1_y),
+                        (base2_x, base2_y)
+                    ]
+
+                    pygame.draw.polygon(self.screen, LIGHT_GRAY, vertices)
+                    pygame.draw.polygon(self.screen, BLACK, vertices, 2)
+
+        # Calculate cell size for objects/agent
+        if self.tiling_type == "square":
+            cell_size = usable_width / self.grid_size
+        elif self.tiling_type == "hex":
+            # Use same calculation as hex rendering
+            width_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+            height_spacing = (self.grid_size - 1) if self.grid_size > 1 else 1
+            size_from_width = 0.95 / ((self.grid_size + 0.5) * math.sqrt(3)) if self.grid_size > 0 else 0.1
+            size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+            size = min(size_from_width, size_from_height)
+            cell_size = size * usable_width
+        else:  # triangle
+            # Use triangle side length
+            side_length = 0.95 * 2 / (self.grid_size + 0.5)
+            cell_size = side_length * usable_width
+
+        # Draw objects
+        for obj in self.env.state.objects.values():
+            if obj.cell_id:
+                x_norm, y_norm = tiling.cell_to_canonical(obj.cell_id)
+                x = x_norm * usable_width + margin
+                y = y_norm * usable_height + margin
+
+                color_map = {'red': RED, 'green': GREEN, 'blue': BLUE, 'yellow': YELLOW}
+                draw_object(self.screen, (x, y), cell_size, color_map.get(obj.color, GRAY))
+
+        # Draw agent
+        agent_x_norm, agent_y_norm = tiling.cell_to_canonical(self.env.state.agent.cell_id)
+        agent_x = agent_x_norm * usable_width + margin
+        agent_y = agent_y_norm * usable_height + margin
+
+        # Calculate facing angle - match direction vectors
+        facing_dir = self.env.state.agent.get_facing_direction(tiling)
+        angle_map_square = {
+            "north": math.pi / 2,    # Up
+            "east": 0,               # Right
+            "south": -math.pi / 2,   # Down
+            "west": math.pi          # Left
+        }
+        angle_map_hex = {
+            "north": math.pi / 2,           # Up (0, -1)
+            "northeast": math.pi / 6,       # Up-right (1, -1)
+            "southeast": -math.pi / 6,      # Down-right (1, 0)
+            "south": -math.pi / 2,          # Down (0, 1)
+            "southwest": -5 * math.pi / 6,  # Down-left (-1, 1)
+            "northwest": 5 * math.pi / 6    # Up-left (-1, 0)
+        }
+        angle_map_triangle = {
+            "edge0": math.pi,        # Left
+            "edge1": 0,              # Right
+            "edge2": -math.pi / 2    # Down or Up depending on orientation
+        }
+
+        if self.tiling_type == "square":
+            facing_angle = angle_map_square.get(facing_dir, 0)
+        elif self.tiling_type == "hex":
+            facing_angle = angle_map_hex.get(facing_dir, 0)
+        else:
+            facing_angle = angle_map_triangle.get(facing_dir, 0)
+
+        draw_agent(self.screen, (agent_x, agent_y), cell_size, facing_angle)
+
+        # Draw held object indicator above agent (adjusts with facing)
+        if self.env.state.agent.holding:
+            held_obj = self.env.state.agent.holding
+            color_map = {'red': RED, 'green': GREEN, 'blue': BLUE, 'yellow': YELLOW}
+            color = color_map.get(held_obj.color, GRAY)
+            # Position held object in direction agent is facing
+            held_x = agent_x + cell_size * 0.6 * math.cos(facing_angle)
+            held_y = agent_y - cell_size * 0.6 * math.sin(facing_angle)
+            pygame.draw.circle(self.screen, color, (int(held_x), int(held_y)), int(cell_size * 0.3))
+            pygame.draw.circle(self.screen, BLACK, (int(held_x), int(held_y)), int(cell_size * 0.3), 2)
+
+    def draw_info(self):
+        """Draw information panel."""
+        info_y = self.height + 10
+
+        state = self.env.get_state_dict()
+
+        # Title
+        title = self.big_font.render(f"{self.tiling_type.upper()} GRID", True, BLACK)
+        self.screen.blit(title, (10, info_y))
+
+        # Info text
+        info_texts = [
+            f"Position: {state['agent']['cell_id']}",
+            f"Facing: {state['agent']['facing_direction']}",
+            f"Holding: {state['agent']['holding'] or 'Nothing'}",
+            f"Steps: {self.env.steps}"
+        ]
+
+        for i, text in enumerate(info_texts):
+            surface = self.font.render(text, True, BLACK)
+            self.screen.blit(surface, (10, info_y + 40 + i * 25))
+
+        # Controls
+        controls = [
+            "Arrow/WASD: Move | Q/E: Turn | SPACE: Pickup/Drop | P: Push",
+            "1: Square | 2: Hex | 3: Triangle | R: Reset | ESC: Quit"
+        ]
+
+        for i, text in enumerate(controls):
+            surface = self.font.render(text, True, DARK_GRAY)
+            self.screen.blit(surface, (self.width // 2 + 10, info_y + 40 + i * 25))
+
+    def run(self):
+        """Main game loop."""
+        running = True
+        while running:
+            running = self.handle_input()
+            self.draw_grid()
+            self.draw_info()
+            pygame.display.flip()
+            self.clock.tick(60)
+
+        pygame.quit()
+
+
+if __name__ == "__main__":
+    demo = InteractiveDemo(width=800, height=800)
+    demo.run()
diff --git a/src/v1_1/model_interface.py b/src/v1_1/model_interface.py
new file mode 100644
index 00000000..00d8c8a9
--- /dev/null
+++ b/src/v1_1/model_interface.py
@@ -0,0 +1,189 @@
+"""
+Standard Model Interface for MultiNet v1.1
+
+Defines the abstract interface all model adapters must implement,
+plus built-in baselines (random, file-based).
+"""
+
+from __future__ import annotations
+
+import json
+import time
+import numpy as np
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class ModelInput:
+    """Input to a model for action prediction."""
+    image: np.ndarray              # (H, W, 3) uint8 RGB observation
+    text_prompt: str               # Mission/task description
+    action_space: dict[int, str]   # {action_id: action_name}
+    step_number: int
+    max_steps: int
+    additional_context: str | None = None
+
+
+@dataclass
+class ModelOutput:
+    """Output from a model prediction."""
+    action: int                    # Predicted action ID
+    confidence: float | None = None
+    reasoning: str | None = None
+    raw_output: str | None = None
+
+
+class ModelInterface(ABC):
+    """
+    Abstract base class for all model adapters.
+
+    Implementations must provide:
+    - model_name property
+    - predict() method
+
+    Optional overrides:
+    - predict_batch() for batched inference
+    - setup() / teardown() for resource management
+    """
+
+    @property
+    @abstractmethod
+    def model_name(self) -> str:
+        """Unique identifier for this model."""
+        ...
+
+    @property
+    def supports_batched(self) -> bool:
+        """Whether this model supports batched prediction."""
+        return False
+
+    @abstractmethod
+    def predict(self, input: ModelInput) -> ModelOutput:
+        """
+        Predict the next action given an observation.
+
+        Args:
+            input: ModelInput with image, text prompt, and action space
+
+        Returns:
+            ModelOutput with predicted action
+        """
+        ...
+
+    def predict_batch(self, inputs: list[ModelInput]) -> list[ModelOutput]:
+        """
+        Predict actions for a batch of observations.
+
+        Default implementation loops over inputs. Override for efficiency.
+        """
+        return [self.predict(inp) for inp in inputs]
+
+    def setup(self, device: str = "cpu") -> None:
+        """
+        Initialize model resources (load weights, etc.).
+
+        Called once before evaluation begins. Override if needed.
+        """
+        pass
+
+    def teardown(self) -> None:
+        """
+        Release model resources.
+
+        Called after evaluation completes. Override if needed.
+        """
+        pass
+
+
+class RandomModelInterface(ModelInterface):
+    """Built-in random baseline that selects actions uniformly at random."""
+
+    def __init__(self, seed: int = 42):
+        self._rng = np.random.RandomState(seed)
+
+    @property
+    def model_name(self) -> str:
+        return "random"
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        action_ids = list(input.action_space.keys())
+        action = self._rng.choice(action_ids)
+        return ModelOutput(
+            action=int(action),
+            confidence=1.0 / len(action_ids),
+            reasoning="Random selection",
+        )
+
+
+class FileBasedModelInterface(ModelInterface):
+    """
+    File-based model protocol for external process integration.
+
+    Writes observations to {work_dir}/input/step_N.json + step_N.png,
+    waits for {work_dir}/output/step_N.json with {"action": int}.
+    This enables external testers to use any language/framework.
+    """
+
+    def __init__(self, work_dir: str, timeout: float = 60.0, poll_interval: float = 0.1):
+        self.work_dir = Path(work_dir)
+        self.timeout = timeout
+        self.poll_interval = poll_interval
+        self.input_dir = self.work_dir / "input"
+        self.output_dir = self.work_dir / "output"
+
+    @property
+    def model_name(self) -> str:
+        return "file_based"
+
+    def setup(self, device: str = "cpu") -> None:
+        self.input_dir.mkdir(parents=True, exist_ok=True)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        step = input.step_number
+
+        # Write input image as PNG
+        from PIL import Image
+        img = Image.fromarray(input.image)
+        img.save(self.input_dir / f"step_{step}.png")
+
+        # Write input metadata as JSON
+        input_data = {
+            "step_number": step,
+            "max_steps": input.max_steps,
+            "text_prompt": input.text_prompt,
+            "action_space": {str(k): v for k, v in input.action_space.items()},
+            "image_path": f"step_{step}.png",
+        }
+        if input.additional_context:
+            input_data["additional_context"] = input.additional_context
+
+        with open(self.input_dir / f"step_{step}.json", "w") as f:
+            json.dump(input_data, f, indent=2)
+
+        # Wait for output
+        output_path = self.output_dir / f"step_{step}.json"
+        start_time = time.time()
+        while not output_path.exists():
+            if time.time() - start_time > self.timeout:
+                raise TimeoutError(
+                    f"Timed out waiting for {output_path} after {self.timeout}s"
+                )
+            time.sleep(self.poll_interval)
+
+        # Read output
+        with open(output_path) as f:
+            result = json.load(f)
+
+        return ModelOutput(
+            action=int(result["action"]),
+            confidence=result.get("confidence"),
+            reasoning=result.get("reasoning"),
+            raw_output=json.dumps(result),
+        )
+
+    def teardown(self) -> None:
+        pass
diff --git a/src/v1_1/multigrid/__init__.py b/src/v1_1/multigrid/__init__.py
new file mode 100644
index 00000000..2c9360b8
--- /dev/null
+++ b/src/v1_1/multigrid/__init__.py
@@ -0,0 +1,70 @@
+# multigrid/__init__.py
+
+"""
+MultiGrid: Topology-Agnostic Gridworld Environments
+
+Provides gridworld environments with pluggable tiling systems:
+- Square: Traditional 4-connected grid (up/down/left/right)
+- Hexagonal: 6-connected pointy-top hexagons
+- Triangle: 3-connected triangles within hexagons
+
+Usage:
+    from multigrid.env import MultiGridEnv, TilingRegistry
+
+    # Create environment with triangle tiling
+    env = MultiGridEnv(task_spec=spec, tiling="triangle")
+    obs, info = env.reset()
+    obs, reward, done, truncated, info = env.step(action)
+"""
+
+from .core import Cell, TilingGraph
+from .base import Tiling
+from .tilings import SquareTiling, HexTiling, TriangleTiling
+from .env import MultiGridEnv, TilingRegistry
+from .agent import AgentState, Action
+from .world import WorldState, execute_action
+from .goals import (
+    Goal,
+    ReachPositionGoal,
+    ReachCanonicalPositionGoal,
+    CollectAllGoal,
+    PushBlockToGoal,
+    SurviveStepsGoal,
+    CompositeGoal,
+    AnyGoal,
+    create_goal_from_spec,
+)
+from .rendering import render_multigrid, MinimalRenderer
+
+__all__ = [
+    # Core
+    'Cell',
+    'TilingGraph',
+    'Tiling',
+    # Tilings
+    'SquareTiling',
+    'HexTiling',
+    'TriangleTiling',
+    # Environment
+    'MultiGridEnv',
+    'TilingRegistry',
+    # Agent
+    'AgentState',
+    'Action',
+    # World
+    'WorldState',
+    'execute_action',
+    # Goals
+    'Goal',
+    'ReachPositionGoal',
+    'ReachCanonicalPositionGoal',
+    'CollectAllGoal',
+    'PushBlockToGoal',
+    'SurviveStepsGoal',
+    'CompositeGoal',
+    'AnyGoal',
+    'create_goal_from_spec',
+    # Rendering
+    'render_multigrid',
+    'MinimalRenderer',
+]
diff --git a/src/v1_1/multigrid/agent.py b/src/v1_1/multigrid/agent.py
new file mode 100644
index 00000000..64118067
--- /dev/null
+++ b/src/v1_1/multigrid/agent.py
@@ -0,0 +1,44 @@
+# multigrid/agent.py
+
+from dataclasses import dataclass
+from enum import IntEnum
+from typing import Optional
+from .objects.base import WorldObj
+from .base import Tiling
+
+
+class Action(IntEnum):
+    """
+    Discrete action space for MultiGrid.
+
+    Actions 0-6 map to MiniGrid's standard 7-action space for compatibility.
+    Action 7 (PUSH) and 8 (TOGGLE) extend beyond MiniGrid's standard set.
+    """
+    # Movement
+    FORWARD = 0       # Move in facing direction
+    BACKWARD = 1      # Move opposite to facing direction
+
+    # Rotation
+    TURN_LEFT = 2     # Rotate facing counter-clockwise
+    TURN_RIGHT = 3    # Rotate facing clockwise
+
+    # Object interaction
+    PICKUP = 4        # Pick up object in facing cell
+    DROP = 5          # Drop held object in facing cell
+    TOGGLE = 6        # Interact: unlock door (with key), activate switch
+    PUSH = 7          # Push object in facing direction
+
+    # No-op
+    WAIT = 8
+
+
+@dataclass
+class AgentState:
+    """Complete agent state."""
+    cell_id: str                       # Current cell
+    facing: int                        # Direction index (0 to num_directions-1)
+    holding: Optional[WorldObj] = None # Picked up object
+
+    def get_facing_direction(self, tiling: Tiling) -> str:
+        """Get direction label agent is facing."""
+        return tiling.directions[self.facing]
diff --git a/src/v1_1/multigrid/base.py b/src/v1_1/multigrid/base.py
new file mode 100644
index 00000000..3c7bc1e2
--- /dev/null
+++ b/src/v1_1/multigrid/base.py
@@ -0,0 +1,56 @@
+# multigrid/base.py
+
+from abc import ABC, abstractmethod
+from typing import Optional
+from .core import Cell, TilingGraph
+
+
+class Tiling(ABC):
+    """Abstract base for all tiling types."""
+
+    def __init__(self):
+        self.width = 0
+        self.height = 0
+        self.cells: dict[str, Cell] = {}
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Tiling identifier (e.g., 'square', 'hex', 'triangle')."""
+        pass
+
+    @property
+    @abstractmethod
+    def directions(self) -> list[str]:
+        """List of valid movement directions."""
+        pass
+
+    @abstractmethod
+    def generate_graph(self, width: int, height: int, seed: int) -> dict[str, Cell]:
+        """Generate the adjacency graph for a world of given size."""
+        pass
+
+    @abstractmethod
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized [0,1] coordinates to cell ID."""
+        pass
+
+    @abstractmethod
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized [0,1] coordinates."""
+        pass
+
+    @abstractmethod
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """Get neighbor cell ID in given direction, or None if blocked/boundary."""
+        pass
+
+    @abstractmethod
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Compute graph distance (hops) between two cells."""
+        pass
+
+    def render_cell(self, cell: Cell, renderer) -> None:
+        """Render a single cell using the provided renderer."""
+        # Default implementation - can be overridden
+        pass
diff --git a/src/v1_1/multigrid/core.py b/src/v1_1/multigrid/core.py
new file mode 100644
index 00000000..81fad829
--- /dev/null
+++ b/src/v1_1/multigrid/core.py
@@ -0,0 +1,24 @@
+# multigrid/core.py
+
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+
+@dataclass
+class Cell:
+    """A single cell in the grid."""
+    id: str                                    # Unique identifier (e.g., "cell_0_0")
+    neighbors: dict[str, str] = field(default_factory=dict)  # direction -> neighbor_cell_id
+    contents: Optional[Any] = None             # Object occupying this cell
+    position_hint: tuple[float, float] = (0.0, 0.0)  # Rendering position (normalized 0-1)
+    tiling_coords: Any = None                  # Tiling-specific coordinates (for math)
+    row: int = 0                               # Grid row (for offset/storage)
+    col: int = 0                               # Grid column (for offset/storage)
+
+
+@dataclass
+class TilingGraph:
+    """Adjacency graph representing the world topology."""
+    cells: dict[str, Cell] = field(default_factory=dict)  # cell_id -> Cell
+    boundary_cells: set[str] = field(default_factory=set)  # IDs of cells at world boundary
+    directions: list[str] = field(default_factory=list)    # Valid direction labels for this tiling
diff --git a/src/v1_1/multigrid/demo.py b/src/v1_1/multigrid/demo.py
new file mode 100644
index 00000000..e17a798f
--- /dev/null
+++ b/src/v1_1/multigrid/demo.py
@@ -0,0 +1,726 @@
+#!/usr/bin/env python3
+"""
+MultiGrid Backend Demo
+
+Demonstrates the custom MultiGrid implementation with:
+- Multiple tiling types (square, hex, triangle)
+- All object types (keys, doors, switches, gates, hazards, teleporters, zones)
+- Mechanism interactions
+
+Usage:
+    python demo.py              # Run all demos
+    python demo.py --visual     # Save PNG images of each demo
+    python demo.py --demo 3     # Run specific demo
+    python demo.py --play       # Interactive play mode
+    python demo.py --play --tiling hex  # Play with hex grid
+"""
+
+import sys
+import argparse
+from pathlib import Path
+import numpy as np
+
+# Ensure imports work
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from multigrid.env import MultiGridEnv, TilingRegistry
+from multigrid.agent import Action
+from multigrid.rendering import render_multigrid
+
+
+def save_image(frame: np.ndarray, path: str):
+    """Save frame as PNG image."""
+    try:
+        from PIL import Image
+        img = Image.fromarray(frame)
+        img.save(path)
+        print(f"  Saved: {path}")
+    except ImportError:
+        print("  PIL not available, skipping image save")
+
+
+def interactive_play(tiling: str = "square"):
+    """
+    Interactive play mode - control the agent with keyboard.
+
+    Controls:
+        Arrow Keys: Move/Turn
+            Up: Move forward
+            Down: Move backward
+            Left: Turn left
+            Right: Turn right
+        Space: Pickup
+        D: Drop
+        T or Enter: Toggle (open door, activate switch)
+        P: Push
+        R: Reset episode
+        Q or Escape: Quit
+    """
+    import pygame
+
+    print("\n" + "=" * 60)
+    print("Interactive Play Mode")
+    print("=" * 60)
+    print(f"\nTiling: {tiling}")
+    print(f"\nControls:")
+    print("  Arrow Up    : Move forward")
+    print("  Arrow Down  : Move backward")
+    print("  Arrow Left  : Turn left")
+    print("  Arrow Right : Turn right")
+    print("  Space       : Pickup")
+    print("  D           : Drop")
+    print("  T / Enter   : Toggle (doors, switches)")
+    print("  P           : Push")
+    print("  R           : Reset")
+    print("  Q / Escape  : Quit")
+    print("\n" + "-" * 60)
+
+    # Create a playground task with various objects
+    task_spec = {
+        "task_id": "interactive_play",
+        "seed": 42,
+        "tiling": {"type": tiling, "grid_size": {"width": 8, "height": 8}},
+        "rules": {"key_consumption": True},
+        "scene": {
+            "agent": {"position": {"x": 0.15, "y": 0.15}, "facing": 1},
+            "objects": [
+                # Key and door
+                {"id": "key_blue", "type": "key", "color": "blue",
+                 "position": {"x": 0.35, "y": 0.15}},
+                {"id": "door_blue", "type": "door", "color": "blue",
+                 "position": {"x": 0.55, "y": 0.15}, "is_locked": True},
+
+                # Switch and gate
+                {"id": "switch_1", "type": "switch", "color": "yellow",
+                 "position": {"x": 0.15, "y": 0.45}, "switch_type": "toggle",
+                 "controls": ["gate_1"], "initial_state": False},
+                {"id": "gate_1", "type": "gate", "color": "yellow",
+                 "position": {"x": 0.55, "y": 0.45}, "is_open": False,
+                 "controlled_by": ["switch_1"]},
+
+                # Pushable box
+                {"id": "box_1", "type": "movable", "color": "green",
+                 "position": {"x": 0.35, "y": 0.65}},
+
+                # Hazard
+                {"id": "lava_1", "type": "hazard", "color": "red",
+                 "position": {"x": 0.75, "y": 0.75}, "hazard_type": "lava"},
+
+                # Goal zone
+                {"id": "goal_zone", "type": "zone", "color": "cyan",
+                 "position": {"x": 0.85, "y": 0.15}},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.85, "y": 0.15}},
+        "limits": {"max_steps": 200}
+    }
+
+    env = MultiGridEnv(task_spec, tiling=tiling, render_mode="rgb_array")
+    obs, info = env.reset()
+
+    # Initialize pygame
+    pygame.init()
+
+    # Scale up for visibility
+    scale = 2
+    display_size = (obs.shape[1] * scale, obs.shape[0] * scale)
+    screen = pygame.display.set_mode(display_size)
+    pygame.display.set_caption(f"MultiGrid ({tiling}): Interactive Play")
+
+    # Key mapping
+    key_to_action = {
+        pygame.K_UP: Action.FORWARD,
+        pygame.K_DOWN: Action.BACKWARD,
+        pygame.K_LEFT: Action.TURN_LEFT,
+        pygame.K_RIGHT: Action.TURN_RIGHT,
+        pygame.K_SPACE: Action.PICKUP,
+        pygame.K_d: Action.DROP,
+        pygame.K_t: Action.TOGGLE,
+        pygame.K_RETURN: Action.TOGGLE,
+        pygame.K_p: Action.PUSH,
+    }
+
+    clock = pygame.time.Clock()
+    running = True
+    step_count = 0
+
+    def render_frame():
+        frame = env.render()
+        surf = pygame.surfarray.make_surface(frame.swapaxes(0, 1))
+        surf = pygame.transform.scale(surf, display_size)
+        screen.blit(surf, (0, 0))
+        pygame.display.flip()
+
+    def print_status():
+        agent = env.state.agent
+        holding = agent.holding.id if agent.holding else "nothing"
+        facing = agent.get_facing_direction(env.tiling)
+        print(f"  Step {step_count}: cell={agent.cell_id}, facing={facing}, holding={holding}")
+
+    render_frame()
+    print(f"\nStarting at {env.state.agent.cell_id}")
+    print(f"Goal: reach the cyan zone at top-right")
+
+    while running:
+        for event in pygame.event.get():
+            if event.type == pygame.QUIT:
+                running = False
+            elif event.type == pygame.KEYDOWN:
+                if event.key in (pygame.K_q, pygame.K_ESCAPE):
+                    running = False
+                elif event.key == pygame.K_r:
+                    # Reset
+                    obs, info = env.reset()
+                    step_count = 0
+                    render_frame()
+                    print("\n--- Episode Reset ---")
+                    print(f"Starting at {env.state.agent.cell_id}")
+                elif event.key in key_to_action:
+                    action = key_to_action[event.key]
+                    obs, reward, terminated, truncated, info = env.step(action.value)
+                    step_count += 1
+                    render_frame()
+                    print_status()
+
+                    # Show action effects
+                    if info.get("action_effect"):
+                        print(f"    -> {info['action_effect']}")
+                    if info.get("invalid_action"):
+                        print(f"    -> blocked")
+
+                    if info.get("hazard_hit"):
+                        print("\n*** STEPPED IN LAVA! ***")
+                        print("Press R to reset or Q to quit")
+                    elif terminated:
+                        print("\n*** GOAL REACHED! ***")
+                        print(f"Completed in {step_count} steps")
+                        print("Press R to reset or Q to quit")
+                    elif truncated:
+                        print("\n*** TIME LIMIT REACHED ***")
+                        print("Press R to reset or Q to quit")
+
+        clock.tick(30)
+
+    pygame.quit()
+    print("\n✓ Interactive session ended")
+
+
+def demo_tiling_types(save_images: bool = False):
+    """Demonstrate all three tiling types."""
+    print("\n" + "=" * 60)
+    print("Demo 1: Tiling Types (Square, Hex, Triangle)")
+    print("=" * 60)
+
+    output_dir = Path(__file__).parent / "demo_output"
+    if save_images:
+        output_dir.mkdir(exist_ok=True)
+
+    for tiling_name in ["square", "hex", "triangle"]:
+        print(f"\n--- {tiling_name.upper()} Tiling ---")
+
+        task_spec = {
+            "task_id": f"demo_{tiling_name}",
+            "seed": 42,
+            "tiling": {
+                "type": tiling_name,
+                "grid_size": {"width": 5, "height": 5}
+            },
+            "scene": {
+                "agent": {"position": {"x": 0.3, "y": 0.3}, "facing": 0},
+                "objects": [
+                    {"id": "box_1", "type": "movable", "color": "blue",
+                     "position": {"x": 0.5, "y": 0.5}},
+                    {"id": "box_2", "type": "movable", "color": "red",
+                     "position": {"x": 0.7, "y": 0.3}},
+                ]
+            },
+            "goal": {"type": "reach_position", "target": {"x": 0.8, "y": 0.8}},
+            "limits": {"max_steps": 50}
+        }
+
+        env = MultiGridEnv(task_spec, tiling=tiling_name, render_mode="rgb_array")
+        obs, info = env.reset()
+
+        tiling = env.tiling
+        print(f"  Cells: {len(tiling.cells)}")
+        print(f"  Directions: {len(tiling.directions)} ({', '.join(tiling.directions)})")
+        print(f"  Agent at: {env.state.agent.cell_id}")
+        print(f"  Observation shape: {obs.shape}")
+
+        if save_images:
+            frame = env.render()
+            save_image(frame, str(output_dir / f"demo1_{tiling_name}.png"))
+
+    print("\n✓ Tiling types demo complete")
+
+
+def demo_all_objects(save_images: bool = False):
+    """Demonstrate all object types."""
+    print("\n" + "=" * 60)
+    print("Demo 2: All Object Types")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "demo_objects",
+        "seed": 42,
+        "tiling": {"type": "square", "grid_size": {"width": 8, "height": 8}},
+        "rules": {"key_consumption": True},
+        "scene": {
+            "agent": {"position": {"x": 0.1, "y": 0.1}, "facing": 1},
+            "objects": [
+                # Row 1: Key and Door
+                {"id": "key_blue", "type": "key", "color": "blue",
+                 "position": {"x": 0.25, "y": 0.15}},
+                {"id": "door_blue", "type": "door", "color": "blue",
+                 "position": {"x": 0.4, "y": 0.15}, "is_locked": True},
+
+                # Row 2: Switch and Gate
+                {"id": "switch_1", "type": "switch", "color": "yellow",
+                 "position": {"x": 0.25, "y": 0.35}, "switch_type": "toggle",
+                 "controls": ["gate_1"], "initial_state": False},
+                {"id": "gate_1", "type": "gate", "color": "yellow",
+                 "position": {"x": 0.5, "y": 0.35}, "is_open": False},
+
+                # Row 3: Movable and Wall
+                {"id": "box_1", "type": "movable", "color": "green",
+                 "position": {"x": 0.25, "y": 0.55}},
+                {"id": "wall_1", "type": "wall", "color": "grey",
+                 "position": {"x": 0.5, "y": 0.55}},
+
+                # Row 4: Hazard and Zone
+                {"id": "lava_1", "type": "hazard", "color": "red",
+                 "position": {"x": 0.25, "y": 0.75}, "hazard_type": "lava"},
+                {"id": "zone_1", "type": "zone", "color": "cyan",
+                 "position": {"x": 0.5, "y": 0.75}},
+
+                # Teleporter pair
+                {"id": "tele_1", "type": "teleporter", "color": "purple",
+                 "position": {"x": 0.75, "y": 0.25}, "linked_to": "tele_2"},
+                {"id": "tele_2", "type": "teleporter", "color": "purple",
+                 "position": {"x": 0.75, "y": 0.75}, "linked_to": "tele_1"},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.9, "y": 0.9}},
+        "limits": {"max_steps": 100}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    env.reset()
+
+    print("\nObjects in scene:")
+    for obj_id, obj in env.state.objects.items():
+        details = f"at {obj.cell_id}"
+        if hasattr(obj, "is_locked"):
+            details += f", locked={obj.is_locked}"
+        if hasattr(obj, "is_open"):
+            details += f", open={obj.is_open}"
+        if hasattr(obj, "is_active"):
+            details += f", active={obj.is_active}"
+        if hasattr(obj, "linked_to"):
+            details += f", linked_to={obj.linked_to}"
+        print(f"  {obj_id} ({obj.obj_type}, {obj.color}): {details}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo2_all_objects.png"))
+
+    print("\n✓ All objects demo complete")
+
+
+def demo_key_door_mechanism(save_images: bool = False):
+    """Demonstrate key + door interaction."""
+    print("\n" + "=" * 60)
+    print("Demo 3: Key + Door Mechanism")
+    print("=" * 60)
+
+    # Grid layout (6 wide):
+    # sq_1_0 (agent) -> sq_1_1 (key) -> sq_1_2 -> sq_1_3 (door) -> sq_1_4 -> sq_1_5 (goal)
+    task_spec = {
+        "task_id": "demo_key_door",
+        "seed": 42,
+        "tiling": {"type": "square", "grid_size": {"width": 6, "height": 3}},
+        "rules": {"key_consumption": True},
+        "scene": {
+            "agent": {"position": {"x": 0.08, "y": 0.5}, "facing": 1},  # sq_1_0, face east
+            "objects": [
+                {"id": "key_blue", "type": "key", "color": "blue",
+                 "position": {"x": 0.25, "y": 0.5}},  # sq_1_1
+                {"id": "door_blue", "type": "door", "color": "blue",
+                 "position": {"x": 0.58, "y": 0.5}, "is_locked": True},  # sq_1_3
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.92, "y": 0.5}},  # sq_1_5
+        "limits": {"max_steps": 20}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    env.reset()
+
+    door = env.state.objects["door_blue"]
+
+    print(f"\nInitial state:")
+    print(f"  Agent: {env.state.agent.cell_id}, facing: {env.state.agent.get_facing_direction(env.tiling)}")
+    print(f"  Key: {env.state.objects['key_blue'].cell_id}")
+    print(f"  Door: {door.cell_id}, locked={door.is_locked}, open={door.is_open}")
+
+    # Execute solution: agent at sq_1_0, key at sq_1_1, door at sq_1_3
+    actions = [
+        (Action.FORWARD, "Move to key (sq_1_1)"),
+        (Action.PICKUP, "Pick up key"),
+        (Action.FORWARD, "Move to sq_1_2"),
+        (Action.FORWARD, "Move to door (sq_1_3) - blocked"),
+        (Action.TOGGLE, "Unlock door with key"),
+        (Action.FORWARD, "Move through door (sq_1_3)"),
+        (Action.FORWARD, "Move to sq_1_4"),
+        (Action.FORWARD, "Move to goal (sq_1_5)"),
+    ]
+
+    print("\nExecuting actions:")
+    for action, desc in actions:
+        obs, reward, terminated, truncated, info = env.step(action.value)
+        holding = env.state.agent.holding.id if env.state.agent.holding else None
+        status = f"pos={env.state.agent.cell_id}, holding={holding}"
+        if info.get("action_effect"):
+            status += f", effect={info['action_effect']}"
+        if info.get("invalid_action"):
+            status += " [BLOCKED]"
+        print(f"  {desc}: {status}")
+
+        if terminated:
+            print("  >>> GOAL REACHED!")
+            break
+
+    print(f"\nFinal state:")
+    print(f"  Door: locked={door.is_locked}, open={door.is_open}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo3_key_door.png"))
+
+    print("\n✓ Key + door demo complete")
+
+
+def demo_switch_gate_mechanism(save_images: bool = False):
+    """Demonstrate switch + gate interaction."""
+    print("\n" + "=" * 60)
+    print("Demo 4: Switch + Gate Mechanism")
+    print("=" * 60)
+
+    # Grid layout (6 wide):
+    # sq_1_0 (agent) -> sq_1_1 (switch) -> sq_1_2 -> sq_1_3 (gate) -> sq_1_4 -> sq_1_5 (goal)
+    task_spec = {
+        "task_id": "demo_switch_gate",
+        "seed": 42,
+        "tiling": {"type": "square", "grid_size": {"width": 6, "height": 3}},
+        "scene": {
+            "agent": {"position": {"x": 0.08, "y": 0.5}, "facing": 1},  # sq_1_0
+            "objects": [
+                {"id": "switch_1", "type": "switch", "color": "yellow",
+                 "position": {"x": 0.25, "y": 0.5}, "switch_type": "toggle",  # sq_1_1
+                 "controls": ["gate_1"], "initial_state": False},
+                {"id": "gate_1", "type": "gate", "color": "yellow",
+                 "position": {"x": 0.58, "y": 0.5}, "is_open": False,  # sq_1_3
+                 "controlled_by": ["switch_1"]},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.92, "y": 0.5}},  # sq_1_5
+        "limits": {"max_steps": 20}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    env.reset()
+
+    switch = env.state.objects["switch_1"]
+    gate = env.state.objects["gate_1"]
+
+    print(f"\nInitial state:")
+    print(f"  Agent: {env.state.agent.cell_id}")
+    print(f"  Switch: {switch.cell_id}, active={switch.is_active}")
+    print(f"  Gate: {gate.cell_id}, open={gate.is_open}")
+
+    actions = [
+        (Action.FORWARD, "Move to switch (sq_1_1)"),
+        (Action.TOGGLE, "Activate switch"),
+        (Action.FORWARD, "Move to sq_1_2"),
+        (Action.FORWARD, "Move through gate (sq_1_3)"),
+        (Action.FORWARD, "Move to sq_1_4"),
+        (Action.FORWARD, "Move to goal (sq_1_5)"),
+    ]
+
+    print("\nExecuting actions:")
+    for action, desc in actions:
+        obs, reward, terminated, truncated, info = env.step(action.value)
+        status = f"pos={env.state.agent.cell_id}, switch={switch.is_active}, gate={gate.is_open}"
+        if info.get("action_effect"):
+            status += f", effect={info['action_effect']}"
+        print(f"  {desc}: {status}")
+
+        if terminated:
+            print("  >>> GOAL REACHED!")
+            break
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo4_switch_gate.png"))
+
+    print("\n✓ Switch + gate demo complete")
+
+
+def demo_hazard(save_images: bool = False):
+    """Demonstrate hazard termination."""
+    print("\n" + "=" * 60)
+    print("Demo 5: Hazard (Lava)")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "demo_hazard",
+        "seed": 42,
+        "tiling": {"type": "square", "grid_size": {"width": 4, "height": 3}},
+        "scene": {
+            "agent": {"position": {"x": 0.15, "y": 0.5}, "facing": 1},
+            "objects": [
+                {"id": "lava_1", "type": "hazard", "color": "red",
+                 "position": {"x": 0.5, "y": 0.5}, "hazard_type": "lava"},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.85, "y": 0.5}},
+        "limits": {"max_steps": 10}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    env.reset()
+
+    print(f"\nAgent starting at {env.state.agent.cell_id}")
+    print(f"Lava at {env.state.objects['lava_1'].cell_id}")
+
+    print("\nMoving toward lava...")
+    obs, reward, terminated, truncated, info = env.step(Action.FORWARD.value)
+    print(f"  Step 1: pos={env.state.agent.cell_id}")
+
+    obs, reward, terminated, truncated, info = env.step(Action.FORWARD.value)
+    print(f"  Step 2: pos={env.state.agent.cell_id}")
+    print(f"  Hazard hit: {info.get('hazard_hit', False)}")
+    print(f"  Terminated: {terminated}")
+
+    if terminated:
+        print("\n  >>> AGENT DIED IN LAVA!")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo5_hazard.png"))
+
+    print("\n✓ Hazard demo complete")
+
+
+def demo_push_action(save_images: bool = False):
+    """Demonstrate push action."""
+    print("\n" + "=" * 60)
+    print("Demo 6: Push Action")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "demo_push",
+        "seed": 42,
+        "tiling": {"type": "square", "grid_size": {"width": 5, "height": 3}},
+        "scene": {
+            "agent": {"position": {"x": 0.1, "y": 0.5}, "facing": 1},
+            "objects": [
+                {"id": "box_1", "type": "movable", "color": "green",
+                 "position": {"x": 0.3, "y": 0.5}},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.9, "y": 0.5}},
+        "limits": {"max_steps": 20}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    env.reset()
+
+    box = env.state.objects["box_1"]
+
+    print(f"\nInitial: Agent at {env.state.agent.cell_id}, Box at {box.cell_id}")
+
+    # Push the box
+    obs, reward, terminated, truncated, info = env.step(Action.PUSH.value)
+    print(f"\nAfter PUSH:")
+    print(f"  Agent at {env.state.agent.cell_id}")
+    print(f"  Box at {box.cell_id}")
+    print(f"  Effect: {info.get('action_effect')}")
+
+    # Push again
+    obs, reward, terminated, truncated, info = env.step(Action.FORWARD.value)
+    obs, reward, terminated, truncated, info = env.step(Action.PUSH.value)
+    print(f"\nAfter move + PUSH:")
+    print(f"  Agent at {env.state.agent.cell_id}")
+    print(f"  Box at {box.cell_id}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo6_push.png"))
+
+    print("\n✓ Push demo complete")
+
+
+def demo_triangle_navigation(save_images: bool = False):
+    """Demonstrate navigation in triangle tiling."""
+    print("\n" + "=" * 60)
+    print("Demo 7: Triangle Tiling Navigation")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "demo_triangle_nav",
+        "seed": 42,
+        "tiling": {"type": "triangle", "grid_size": {"width": 4, "height": 4}},
+        "scene": {
+            "agent": {"position": {"x": 0.3, "y": 0.3}, "facing": 0},
+            "objects": [
+                {"id": "goal_marker", "type": "zone", "color": "green",
+                 "position": {"x": 0.7, "y": 0.7}},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.7, "y": 0.7}},
+        "limits": {"max_steps": 30}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="triangle", render_mode="rgb_array")
+    env.reset()
+
+    print(f"\nTriangle tiling:")
+    print(f"  Total cells: {len(env.tiling.cells)}")
+    print(f"  Directions: {env.tiling.directions}")
+    print(f"  Agent at: {env.state.agent.cell_id}")
+    print(f"  Agent facing: {env.state.agent.get_facing_direction(env.tiling)}")
+
+    print("\nNavigating (10 random moves):")
+    import random
+    for i in range(10):
+        action = random.choice([Action.FORWARD, Action.TURN_LEFT, Action.TURN_RIGHT])
+        obs, reward, terminated, truncated, info = env.step(action.value)
+        facing = env.state.agent.get_facing_direction(env.tiling)
+        print(f"  {i+1}. {action.name}: cell={env.state.agent.cell_id}, facing={facing}")
+
+        if terminated:
+            print("  >>> GOAL REACHED!")
+            break
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo7_triangle.png"))
+
+    print("\n✓ Triangle navigation demo complete")
+
+
+def demo_hex_with_mechanisms(save_images: bool = False):
+    """Demonstrate hex tiling with mechanisms."""
+    print("\n" + "=" * 60)
+    print("Demo 8: Hex Tiling with Mechanisms")
+    print("=" * 60)
+
+    task_spec = {
+        "task_id": "demo_hex_mechanisms",
+        "seed": 42,
+        "tiling": {"type": "hex", "grid_size": {"width": 4, "height": 4}},
+        "rules": {"key_consumption": True},
+        "scene": {
+            "agent": {"position": {"x": 0.2, "y": 0.2}, "facing": 1},
+            "objects": [
+                {"id": "key_red", "type": "key", "color": "red",
+                 "position": {"x": 0.4, "y": 0.3}},
+                {"id": "door_red", "type": "door", "color": "red",
+                 "position": {"x": 0.6, "y": 0.5}, "is_locked": True},
+                {"id": "box_1", "type": "movable", "color": "blue",
+                 "position": {"x": 0.3, "y": 0.6}},
+            ]
+        },
+        "goal": {"type": "reach_position", "target": {"x": 0.8, "y": 0.8}},
+        "limits": {"max_steps": 50}
+    }
+
+    env = MultiGridEnv(task_spec, tiling="hex", render_mode="rgb_array")
+    env.reset()
+
+    print(f"\nHex tiling:")
+    print(f"  Total cells: {len(env.tiling.cells)}")
+    print(f"  Directions: {env.tiling.directions}")
+
+    print("\nObjects:")
+    for obj_id, obj in env.state.objects.items():
+        print(f"  {obj_id} ({obj.obj_type}): {obj.cell_id}")
+
+    if save_images:
+        output_dir = Path(__file__).parent / "demo_output"
+        output_dir.mkdir(exist_ok=True)
+        frame = env.render()
+        save_image(frame, str(output_dir / "demo8_hex_mechanisms.png"))
+
+    print("\n✓ Hex mechanisms demo complete")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="MultiGrid Backend Demo")
+    parser.add_argument("--visual", action="store_true", help="Save PNG images")
+    parser.add_argument("--demo", type=int, help="Run specific demo (1-8)")
+    parser.add_argument("--play", action="store_true", help="Interactive play mode")
+    parser.add_argument("--tiling", type=str, default="square",
+                        choices=["square", "hex", "triangle"],
+                        help="Tiling type for play mode (default: square)")
+    args = parser.parse_args()
+
+    # Interactive play mode
+    if args.play:
+        interactive_play(args.tiling)
+        return
+
+    print("=" * 60)
+    print("MultiGrid Backend Demo")
+    print("=" * 60)
+    print("\nThis demo uses the custom MultiGrid implementation with")
+    print("support for square, hex, and triangle tilings.")
+
+    demos = [
+        ("Tiling Types", demo_tiling_types),
+        ("All Objects", demo_all_objects),
+        ("Key + Door", demo_key_door_mechanism),
+        ("Switch + Gate", demo_switch_gate_mechanism),
+        ("Hazard", demo_hazard),
+        ("Push Action", demo_push_action),
+        ("Triangle Navigation", demo_triangle_navigation),
+        ("Hex with Mechanisms", demo_hex_with_mechanisms),
+    ]
+
+    if args.demo:
+        if 1 <= args.demo <= len(demos):
+            name, fn = demos[args.demo - 1]
+            fn(save_images=args.visual)
+        else:
+            print(f"Invalid demo number. Choose 1-{len(demos)}")
+            print("\nAvailable demos:")
+            for i, (name, _) in enumerate(demos, 1):
+                print(f"  {i}. {name}")
+    else:
+        for name, fn in demos:
+            fn(save_images=args.visual)
+
+    print("\n" + "=" * 60)
+    print("MultiGrid Demo Complete!")
+    print("=" * 60)
+
+    if args.visual:
+        output_dir = Path(__file__).parent / "demo_output"
+        print(f"\nImages saved to: {output_dir}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/multigrid/demo_output/demo1_hex.png b/src/v1_1/multigrid/demo_output/demo1_hex.png
new file mode 100644
index 00000000..ac8384a4
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo1_hex.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo1_square.png b/src/v1_1/multigrid/demo_output/demo1_square.png
new file mode 100644
index 00000000..ab49aca9
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo1_square.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo1_triangle.png b/src/v1_1/multigrid/demo_output/demo1_triangle.png
new file mode 100644
index 00000000..abe8108e
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo1_triangle.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo2_all_objects.png b/src/v1_1/multigrid/demo_output/demo2_all_objects.png
new file mode 100644
index 00000000..9e34e796
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo2_all_objects.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo3_key_door.png b/src/v1_1/multigrid/demo_output/demo3_key_door.png
new file mode 100644
index 00000000..37908ad0
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo3_key_door.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo4_switch_gate.png b/src/v1_1/multigrid/demo_output/demo4_switch_gate.png
new file mode 100644
index 00000000..7a5f6636
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo4_switch_gate.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo5_hazard.png b/src/v1_1/multigrid/demo_output/demo5_hazard.png
new file mode 100644
index 00000000..9c3a3593
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo5_hazard.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo6_push.png b/src/v1_1/multigrid/demo_output/demo6_push.png
new file mode 100644
index 00000000..c6df5312
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo6_push.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo7_triangle.png b/src/v1_1/multigrid/demo_output/demo7_triangle.png
new file mode 100644
index 00000000..6849fa2c
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo7_triangle.png differ
diff --git a/src/v1_1/multigrid/demo_output/demo8_hex_mechanisms.png b/src/v1_1/multigrid/demo_output/demo8_hex_mechanisms.png
new file mode 100644
index 00000000..86072eea
Binary files /dev/null and b/src/v1_1/multigrid/demo_output/demo8_hex_mechanisms.png differ
diff --git a/src/v1_1/multigrid/env.py b/src/v1_1/multigrid/env.py
new file mode 100644
index 00000000..bd46462a
--- /dev/null
+++ b/src/v1_1/multigrid/env.py
@@ -0,0 +1,273 @@
+# multigrid/env.py
+
+import json
+import numpy as np
+from typing import Optional, Union
+import gymnasium as gym
+from gymnasium import spaces
+from .agent import Action
+from .world import WorldState, execute_action
+from .base import Tiling
+from .tilings import SquareTiling, HexTiling, TriangleTiling, Archimedean3464Tiling, Archimedean488Tiling
+from .rendering import render_multigrid
+
+
+class TilingRegistry:
+    """Registry for tiling types."""
+    _types = {
+        "square": SquareTiling,
+        "hex": HexTiling,
+        "triangle": TriangleTiling,
+        "3464": Archimedean3464Tiling,
+        "488": Archimedean488Tiling,
+    }
+
+    @classmethod
+    def get(cls, name: str) -> Tiling:
+        """Get tiling instance by name."""
+        if name not in cls._types:
+            raise ValueError(f"Unknown tiling type: {name}")
+        return cls._types[name]()
+
+
+class MultiGridEnv(gym.Env):
+    """
+    MultiGrid environment with arbitrary tiling support.
+
+    Fully compatible with gymnasium.Env for RL library compatibility.
+    """
+
+    metadata = {
+        "render_modes": ["human", "rgb_array", "state_dict"],
+        "render_fps": 10,
+    }
+
+    def __init__(
+        self,
+        task_spec: Union[dict, str],           # Task spec dict or path to JSON
+        tiling: Union[str, Tiling] = "square", # Tiling type or instance
+        render_mode: Optional[str] = None,
+        render_style: str = "minimal",         # "minimal" or "sprite"
+        partial_obs: bool = False,             # Partial observability
+        obs_radius: int = 3,                   # Vision radius if partial_obs
+        observability_mode: str = "full",      # "full", "view_cone", "fog_of_war"
+    ):
+        super().__init__()
+
+        # Load task spec
+        if isinstance(task_spec, str):
+            with open(task_spec) as f:
+                task_spec = json.load(f)
+        self.task_spec = task_spec
+
+        # Initialize tiling
+        if isinstance(tiling, str):
+            self.tiling = TilingRegistry.get(tiling)
+        else:
+            self.tiling = tiling
+
+        self.render_mode = render_mode
+        self.render_style = render_style
+        self.partial_obs = partial_obs
+        self.obs_radius = obs_radius
+        self.observability_mode = observability_mode
+
+        # If partial_obs is True but mode is still "full", default to "view_cone"
+        if self.partial_obs and self.observability_mode == "full":
+            self.observability_mode = "view_cone"
+
+        # Define Gymnasium action space
+        self.action_space = spaces.Discrete(len(Action))
+
+        # Define Gymnasium observation space (RGB image)
+        # Simplified: 64x64 RGB for now
+        self.observation_space = spaces.Box(
+            low=0, high=255,
+            shape=(64, 64, 3),
+            dtype=np.uint8
+        )
+
+        # State tracking
+        self.state: Optional[WorldState] = None
+        self.steps: int = 0
+        self.renderer = None
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None
+    ) -> tuple[np.ndarray, dict]:
+        """Reset environment to initial state."""
+        # Use task spec seed if not overridden
+        actual_seed = seed if seed is not None else self.task_spec.get("seed", 0)
+
+        # Generate world from task spec
+        self.state = WorldState.from_task_spec(
+            self.task_spec,
+            self.tiling,
+            seed=actual_seed
+        )
+        self.steps = 0
+
+        # Configure partial observability on the state
+        self.state.observability_mode = self.observability_mode
+        self.state.view_radius = self.obs_radius
+        self.state.update_visibility()
+
+        obs = self._get_obs()
+        info = self._get_info()
+
+        return obs, info
+
+    def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, dict]:
+        """Execute action and return (obs, reward, terminated, truncated, info)."""
+        assert self.state is not None, "Call reset() before step()"
+
+        # Execute action
+        self.state, done, action_info = execute_action(
+            self.state,
+            Action(action),
+            self.tiling
+        )
+        self.steps += 1
+
+        # Update visibility after movement
+        self.state.update_visibility()
+
+        # Compute reward
+        reward = self._compute_reward(done, action_info)
+
+        # Check termination conditions
+        terminated = done  # Goal achieved
+        truncated = self.steps >= self.task_spec["limits"]["max_steps"]
+
+        obs = self._get_obs()
+        info = self._get_info()
+        info.update(action_info)
+
+        return obs, reward, terminated, truncated, info
+
+    def render(self) -> Optional[np.ndarray]:
+        """Render the environment."""
+        if self.render_mode == "rgb_array":
+            return self._render_frame()
+        elif self.render_mode == "human":
+            self._render_human()
+            return None
+        elif self.render_mode == "state_dict":
+            return self.get_state_dict()
+
+    def get_state_dict(self) -> dict:
+        """Export full state as structured dict for cross-domain verification."""
+        return {
+            "agent": {
+                "cell_id": self.state.agent.cell_id,
+                "facing": self.state.agent.facing,
+                "facing_direction": self.state.agent.get_facing_direction(self.tiling),
+                "holding": self.state.agent.holding.id if self.state.agent.holding else None,
+                "position_canonical": self.tiling.cell_to_canonical(self.state.agent.cell_id)
+            },
+            "objects": {
+                obj.id: {
+                    "type": obj.obj_type,
+                    "cell_id": obj.cell_id,
+                    "position_canonical": self.tiling.cell_to_canonical(obj.cell_id) if obj.cell_id else None,
+                    "color": obj.color
+                }
+                for obj in self.state.objects.values()
+            },
+            "step": self.steps,
+            "goal_achieved": self.state.check_goal()
+        }
+
+    def _get_obs(self) -> np.ndarray:
+        """Get observation based on observability mode."""
+        if self.state is None:
+            return np.zeros((64, 64, 3), dtype=np.uint8)
+
+        # Get goal cell ID for rendering if goal is position-based
+        goal_cell_id = None
+        if self.state.goal is not None:
+            # Check if goal has a target_cell_id (ReachPositionGoal or ReachCanonicalPositionGoal)
+            if hasattr(self.state.goal, 'target_cell_id'):
+                goal_cell_id = self.state.goal.target_cell_id
+
+        # Pass visibility info to renderer for partial observability
+        visible = self.state.visible_cells if self.state.observability_mode != "full" else None
+        explored = self.state.explored_cells if self.state.observability_mode != "full" else None
+
+        # Render observation at 64x64 for VLM input
+        return render_multigrid(
+            self.state,
+            self.tiling,
+            width=64,
+            height=64,
+            goal_cell_id=goal_cell_id,
+            visible_cells=visible,
+            explored_cells=explored,
+        )
+
+    def _get_info(self) -> dict:
+        """Get info dict."""
+        info = {
+            "step": self.steps,
+            "agent_cell": self.state.agent.cell_id,
+        }
+        if self.state.observability_mode != "full":
+            info["visible_cells"] = len(self.state.visible_cells)
+            info["explored_cells"] = len(self.state.explored_cells)
+            info["total_cells"] = len(self.tiling.cells)
+        return info
+
+    def _compute_reward(self, done: bool, action_info: dict) -> float:
+        """Compute reward signal."""
+        if done:
+            return 1.0  # Goal achieved
+        elif action_info.get("invalid_action"):
+            return -0.01  # Small penalty for invalid actions
+        else:
+            return 0.0  # Neutral
+
+    def _render_frame(self) -> np.ndarray:
+        """Render frame to RGB array."""
+        if self.state is None:
+            return np.zeros((640, 640, 3), dtype=np.uint8)
+
+        # Get goal cell ID for rendering if goal is position-based
+        goal_cell_id = None
+        if self.state.goal is not None:
+            if hasattr(self.state.goal, 'target_cell_id'):
+                goal_cell_id = self.state.goal.target_cell_id
+
+        # Pass visibility info to renderer for partial observability
+        visible = self.state.visible_cells if self.state.observability_mode != "full" else None
+        explored = self.state.explored_cells if self.state.observability_mode != "full" else None
+
+        # Render at higher resolution for human viewing
+        return render_multigrid(
+            self.state,
+            self.tiling,
+            width=640,
+            height=640,
+            goal_cell_id=goal_cell_id,
+            visible_cells=visible,
+            explored_cells=explored,
+        )
+
+    def _render_human(self):
+        """Render for human viewing."""
+        if self.state is None:
+            print("No state to render")
+            return
+
+        # Print state info
+        print(f"Step {self.steps}, Agent at {self.state.agent.cell_id}, Facing: {self.state.agent.facing}")
+
+        # Try to display image if PIL is available
+        try:
+            from PIL import Image
+            frame = self._render_frame()
+            img = Image.fromarray(frame)
+            img.show()
+        except ImportError:
+            print("PIL not available for image display")
diff --git a/src/v1_1/multigrid/goals.py b/src/v1_1/multigrid/goals.py
new file mode 100644
index 00000000..983230c7
--- /dev/null
+++ b/src/v1_1/multigrid/goals.py
@@ -0,0 +1,302 @@
+# multigrid/goals.py
+
+"""
+Goal System for MultiGrid Environments
+
+Provides goal predicates that can be checked against world state to determine
+if an episode has been successfully completed.
+
+Supported goal types:
+- reach_position: Agent must reach a specific cell
+- collect_all: Agent must collect all specified objects
+- push_block_to: Agent must push block(s) to target position(s)
+- survive_steps: Agent must survive for N steps (always returns False until truncation)
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Optional, Union
+
+if TYPE_CHECKING:
+    from .world import WorldState
+    from .base import Tiling
+
+
+class Goal(ABC):
+    """Abstract base class for goal predicates."""
+
+    @abstractmethod
+    def check(self, state: "WorldState") -> bool:
+        """
+        Check if the goal condition is satisfied.
+
+        Args:
+            state: Current world state
+
+        Returns:
+            True if goal is achieved, False otherwise
+        """
+        pass
+
+    @abstractmethod
+    def get_description(self) -> str:
+        """Get human-readable description of the goal."""
+        pass
+
+
+class ReachPositionGoal(Goal):
+    """Goal: Agent must reach a specific cell."""
+
+    def __init__(self, target_cell_id: str):
+        """
+        Args:
+            target_cell_id: The cell ID the agent must reach
+        """
+        self.target_cell_id = target_cell_id
+
+    def check(self, state: "WorldState") -> bool:
+        return state.agent.cell_id == self.target_cell_id
+
+    def get_description(self) -> str:
+        return f"Reach position {self.target_cell_id}"
+
+
+class ReachCanonicalPositionGoal(Goal):
+    """Goal: Agent must reach a cell at canonical coordinates (uses nearest cell)."""
+
+    def __init__(self, x: float, y: float, tiling: "Tiling"):
+        """
+        Args:
+            x: Target x coordinate (normalized 0-1)
+            y: Target y coordinate (normalized 0-1)
+            tiling: Tiling to convert coordinates to cell ID
+        """
+        self.x = x
+        self.y = y
+        self.tiling = tiling
+        self._target_cell_id: Optional[str] = None
+
+    @property
+    def target_cell_id(self) -> str:
+        if self._target_cell_id is None:
+            self._target_cell_id = self.tiling.canonical_to_cell(self.x, self.y)
+        return self._target_cell_id
+
+    def check(self, state: "WorldState") -> bool:
+        return state.agent.cell_id == self.target_cell_id
+
+    def get_description(self) -> str:
+        return f"Reach position ({self.x:.2f}, {self.y:.2f})"
+
+
+class CollectAllGoal(Goal):
+    """Goal: Agent must collect all specified objects."""
+
+    def __init__(self, object_ids: list[str]):
+        """
+        Args:
+            object_ids: List of object IDs that must be collected
+        """
+        self.object_ids = set(object_ids)
+        self.collected: set[str] = set()
+
+    def check(self, state: "WorldState") -> bool:
+        # Check which objects are no longer in the world (collected)
+        remaining_objects = set(state.objects.keys())
+        collected = self.object_ids - remaining_objects
+
+        # Also check if agent is holding any target objects
+        if state.agent.holding and state.agent.holding.id in self.object_ids:
+            collected.add(state.agent.holding.id)
+
+        return collected == self.object_ids
+
+    def get_description(self) -> str:
+        return f"Collect all items: {', '.join(self.object_ids)}"
+
+
+class PushBlockToGoal(Goal):
+    """Goal: Push specified block(s) to target position(s)."""
+
+    def __init__(self, block_targets: dict[str, str]):
+        """
+        Args:
+            block_targets: Mapping of block_id -> target_cell_id
+        """
+        self.block_targets = block_targets
+
+    def check(self, state: "WorldState") -> bool:
+        for block_id, target_cell in self.block_targets.items():
+            if block_id not in state.objects:
+                return False  # Block doesn't exist
+            if state.objects[block_id].cell_id != target_cell:
+                return False  # Block not at target
+        return True
+
+    def get_description(self) -> str:
+        targets = [f"{bid} to {cell}" for bid, cell in self.block_targets.items()]
+        return f"Push blocks: {', '.join(targets)}"
+
+
+class SurviveStepsGoal(Goal):
+    """Goal: Survive for N steps (never returns True from check, relies on truncation)."""
+
+    def __init__(self, steps: int):
+        """
+        Args:
+            steps: Number of steps to survive
+        """
+        self.steps = steps
+
+    def check(self, state: "WorldState") -> bool:
+        # This goal is achieved via truncation, not termination
+        return False
+
+    def get_description(self) -> str:
+        return f"Survive for {self.steps} steps"
+
+
+class ObjectInZoneGoal(Goal):
+    """Goal: A specified object must be inside a zone's covered_cells for N consecutive steps."""
+
+    def __init__(self, object_id: str, zone_id: str, consecutive_steps: int = 1):
+        self.object_id = object_id
+        self.zone_id = zone_id
+        self.consecutive_steps = consecutive_steps
+        self._steps_in_zone = 0
+
+    def check(self, state: "WorldState") -> bool:
+        obj = state.objects.get(self.object_id)
+        zone = state.objects.get(self.zone_id)
+        if obj and zone and obj.cell_id in zone.covered_cells:
+            self._steps_in_zone += 1
+        else:
+            self._steps_in_zone = 0
+        return self._steps_in_zone >= self.consecutive_steps
+
+    def get_description(self) -> str:
+        desc = f"Object {self.object_id} in zone {self.zone_id}"
+        if self.consecutive_steps > 1:
+            desc += f" for {self.consecutive_steps} consecutive steps"
+        return desc
+
+
+class CompositeGoal(Goal):
+    """Goal: All sub-goals must be achieved (AND logic)."""
+
+    def __init__(self, goals: list[Goal]):
+        """
+        Args:
+            goals: List of goals that must all be satisfied
+        """
+        self.goals = goals
+
+    def check(self, state: "WorldState") -> bool:
+        return all(goal.check(state) for goal in self.goals)
+
+    def get_description(self) -> str:
+        descs = [goal.get_description() for goal in self.goals]
+        return " AND ".join(descs)
+
+
+class AnyGoal(Goal):
+    """Goal: Any one sub-goal must be achieved (OR logic)."""
+
+    def __init__(self, goals: list[Goal]):
+        """
+        Args:
+            goals: List of goals where any one is sufficient
+        """
+        self.goals = goals
+
+    def check(self, state: "WorldState") -> bool:
+        return any(goal.check(state) for goal in self.goals)
+
+    def get_description(self) -> str:
+        descs = [goal.get_description() for goal in self.goals]
+        return " OR ".join(descs)
+
+
+def create_goal_from_spec(goal_spec: dict, tiling: "Tiling") -> Goal:
+    """
+    Create a Goal object from a goal specification dictionary.
+
+    Args:
+        goal_spec: Dictionary containing goal specification
+            - type: Goal type ("reach_position", "collect_all", "push_block_to", "survive_steps")
+            - target: Target position for reach_position (dict with x, y)
+            - target_ids: List of object IDs for collect_all
+            - block_targets: Dict of block_id -> target position for push_block_to
+            - auxiliary_conditions: Additional goals to AND together
+
+        tiling: Tiling instance for coordinate conversion
+
+    Returns:
+        Goal object
+    """
+    goal_type = goal_spec.get("type", "reach_position")
+    goals = []
+
+    if goal_type == "reach_position":
+        target = goal_spec.get("target")
+        if target:
+            if isinstance(target, dict):
+                # Canonical coordinates
+                goals.append(ReachCanonicalPositionGoal(target["x"], target["y"], tiling))
+            elif isinstance(target, str):
+                # Cell ID
+                goals.append(ReachPositionGoal(target))
+            elif isinstance(target, (list, tuple)) and len(target) == 2:
+                # [x, y] format - treat as canonical coordinates
+                goals.append(ReachCanonicalPositionGoal(float(target[0]), float(target[1]), tiling))
+
+    elif goal_type == "collect_all":
+        target_ids = goal_spec.get("target_ids", [])
+        if target_ids:
+            goals.append(CollectAllGoal(target_ids))
+
+    elif goal_type == "push_block_to":
+        # Build block_targets mapping
+        target_ids = goal_spec.get("target_ids", [])
+        target_positions = goal_spec.get("target_positions", [])
+
+        if target_ids and target_positions:
+            block_targets = {}
+            for block_id, target_pos in zip(target_ids, target_positions):
+                if isinstance(target_pos, dict):
+                    target_cell = tiling.canonical_to_cell(target_pos["x"], target_pos["y"])
+                elif isinstance(target_pos, (list, tuple)) and len(target_pos) == 2:
+                    target_cell = tiling.canonical_to_cell(float(target_pos[0]), float(target_pos[1]))
+                else:
+                    target_cell = str(target_pos)
+                block_targets[block_id] = target_cell
+            goals.append(PushBlockToGoal(block_targets))
+
+    elif goal_type == "object_in_zone":
+        goals.append(ObjectInZoneGoal(
+            goal_spec["object_id"],
+            goal_spec["zone_id"],
+            goal_spec.get("consecutive_steps", 1),
+        ))
+
+    elif goal_type == "survive_steps":
+        steps = goal_spec.get("steps", goal_spec.get("max_steps", 100))
+        goals.append(SurviveStepsGoal(steps))
+
+    # Handle auxiliary conditions
+    auxiliary = goal_spec.get("auxiliary_conditions", [])
+    for aux in auxiliary:
+        if isinstance(aux, dict):
+            aux_goal = create_goal_from_spec(aux, tiling)
+            goals.append(aux_goal)
+        elif isinstance(aux, str):
+            # Simple string conditions (could be expanded)
+            pass
+
+    if len(goals) == 0:
+        # Default: reach position (0.9, 0.9) - bottom-right
+        return ReachCanonicalPositionGoal(0.9, 0.9, tiling)
+    elif len(goals) == 1:
+        return goals[0]
+    else:
+        return CompositeGoal(goals)
diff --git a/src/v1_1/multigrid/objects/__init__.py b/src/v1_1/multigrid/objects/__init__.py
new file mode 100644
index 00000000..f1cf5dde
--- /dev/null
+++ b/src/v1_1/multigrid/objects/__init__.py
@@ -0,0 +1,6 @@
+# objects/__init__.py
+
+from .base import WorldObj, ObjectRegistry, PhysicsProperties
+from .builtin import MovableObj, Wall, Zone
+
+__all__ = ['WorldObj', 'ObjectRegistry', 'PhysicsProperties', 'MovableObj', 'Wall', 'Zone']
diff --git a/src/v1_1/multigrid/objects/base.py b/src/v1_1/multigrid/objects/base.py
new file mode 100644
index 00000000..d16075d7
--- /dev/null
+++ b/src/v1_1/multigrid/objects/base.py
@@ -0,0 +1,67 @@
+# objects/base.py
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class PhysicsProperties:
+    """Physics properties for objects (stubbed for future implementation)."""
+    mass: float = 1.0
+    friction: float = 0.5
+    restitution: float = 0.0  # Bounciness
+
+
+class WorldObj(ABC):
+    """Base class for all objects in the world."""
+
+    def __init__(self, id: str, color: str):
+        self.id = id
+        self.color = color
+        self.cell_id: Optional[str] = None  # Current location
+
+    @property
+    @abstractmethod
+    def obj_type(self) -> str:
+        """Object type identifier."""
+        pass
+
+    @abstractmethod
+    def can_overlap(self) -> bool:
+        """Whether agent/objects can occupy same cell."""
+        pass
+
+    @abstractmethod
+    def can_pickup(self) -> bool:
+        """Whether agent can pick this up."""
+        pass
+
+    @abstractmethod
+    def can_push(self) -> bool:
+        """Whether agent can push this."""
+        pass
+
+    def get_physics(self) -> PhysicsProperties:
+        """Get physics properties. Override in subclasses for custom behavior."""
+        return PhysicsProperties()
+
+
+class ObjectRegistry:
+    """Registry for object types."""
+    _types: dict[str, type[WorldObj]] = {}
+
+    @classmethod
+    def register(cls, obj_type: str):
+        """Decorator to register an object type."""
+        def decorator(obj_class: type[WorldObj]):
+            cls._types[obj_type] = obj_class
+            return obj_class
+        return decorator
+
+    @classmethod
+    def create(cls, obj_type: str, **kwargs) -> WorldObj:
+        """Factory method to create objects."""
+        if obj_type not in cls._types:
+            raise ValueError(f"Unknown object type: {obj_type}")
+        return cls._types[obj_type](**kwargs)
diff --git a/src/v1_1/multigrid/objects/builtin.py b/src/v1_1/multigrid/objects/builtin.py
new file mode 100644
index 00000000..300fbf1a
--- /dev/null
+++ b/src/v1_1/multigrid/objects/builtin.py
@@ -0,0 +1,367 @@
+# objects/builtin.py
+
+"""
+Built-in Object Types for MultiGrid
+
+Provides all standard object types for gridworld puzzles:
+- Movable: Pickable/pushable objects (boxes, balls)
+- Wall: Impassable barriers
+- Zone: Target areas (overlappable)
+- Key: Colored keys for unlocking doors
+- Door: Barriers that require matching key to unlock
+- Switch: Controls gates (toggle/hold/one-shot modes)
+- Gate: Barriers controlled by switches
+- Hazard: Dangerous cells that terminate episode
+- Teleporter: Linked pairs that transport agent
+"""
+
+from typing import Optional, Literal
+from .base import WorldObj, ObjectRegistry
+
+
+@ObjectRegistry.register("movable")
+class MovableObj(WorldObj):
+    """Movable object (can be picked up or pushed)."""
+
+    @property
+    def obj_type(self) -> str:
+        return "movable"
+
+    def can_overlap(self) -> bool:
+        return False
+
+    def can_pickup(self) -> bool:
+        return True
+
+    def can_push(self) -> bool:
+        return True
+
+
+@ObjectRegistry.register("wall")
+class Wall(WorldObj):
+    """Wall object (blocks movement)."""
+
+    @property
+    def obj_type(self) -> str:
+        return "wall"
+
+    def can_overlap(self) -> bool:
+        return False
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+
+@ObjectRegistry.register("zone")
+class Zone(WorldObj):
+    """Target zone - agent and objects can occupy."""
+
+    def __init__(self, id: str, color: str, radius_hops: int = 1):
+        super().__init__(id, color)
+        self.radius_hops = radius_hops
+        self.covered_cells: set[str] = set()  # Computed from tiling
+
+    @property
+    def obj_type(self) -> str:
+        return "zone"
+
+    def can_overlap(self) -> bool:
+        return True
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+
+@ObjectRegistry.register("key")
+class Key(WorldObj):
+    """
+    Key object for unlocking doors.
+
+    Keys can be picked up and used to unlock doors of matching color.
+    Depending on rules.key_consumption, keys may be consumed on use.
+    """
+
+    def __init__(self, id: str, color: str):
+        super().__init__(id, color)
+        self.used: bool = False  # Track if key has been used
+
+    @property
+    def obj_type(self) -> str:
+        return "key"
+
+    def can_overlap(self) -> bool:
+        return False
+
+    def can_pickup(self) -> bool:
+        return True
+
+    def can_push(self) -> bool:
+        return False
+
+
+@ObjectRegistry.register("door")
+class Door(WorldObj):
+    """
+    Door object that blocks movement until unlocked.
+
+    Doors require a key of matching color to unlock. Once unlocked,
+    the door becomes passable (can_overlap returns True).
+
+    Attributes:
+        is_locked: Whether the door is currently locked
+        is_open: Whether the door is open (unlocked and toggled open)
+    """
+
+    def __init__(self, id: str, color: str, is_locked: bool = True):
+        super().__init__(id, color)
+        self.is_locked = is_locked
+        self.is_open = not is_locked  # Unlocked doors start open
+
+    @property
+    def obj_type(self) -> str:
+        return "door"
+
+    def can_overlap(self) -> bool:
+        # Can pass through if unlocked and open
+        return self.is_open
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+    def unlock(self) -> bool:
+        """Unlock the door. Returns True if successfully unlocked."""
+        if self.is_locked:
+            self.is_locked = False
+            self.is_open = True
+            return True
+        return False
+
+    def toggle(self) -> None:
+        """Toggle door open/closed (only works if unlocked)."""
+        if not self.is_locked:
+            self.is_open = not self.is_open
+
+
+@ObjectRegistry.register("switch")
+class Switch(WorldObj):
+    """
+    Switch that controls one or more gates.
+
+    Switch types:
+    - toggle: Each activation flips the state
+    - hold: Active only while agent is on the switch
+    - one_shot: Can only be activated once
+
+    Attributes:
+        switch_type: Type of switch behavior
+        is_active: Current switch state
+        controls: List of gate IDs this switch controls
+        used: Whether one_shot switch has been used
+    """
+
+    def __init__(
+        self,
+        id: str,
+        color: str,
+        switch_type: Literal["toggle", "hold", "one_shot"] = "toggle",
+        controls: Optional[list[str]] = None,
+        initial_state: bool = False
+    ):
+        super().__init__(id, color)
+        self.switch_type = switch_type
+        self.is_active = initial_state
+        self.controls = controls or []
+        self.used = False  # For one_shot switches
+
+    @property
+    def obj_type(self) -> str:
+        return "switch"
+
+    def can_overlap(self) -> bool:
+        # Agent can stand on switches
+        return True
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+    def activate(self) -> bool:
+        """
+        Activate the switch.
+
+        Returns True if state changed.
+        """
+        if self.switch_type == "one_shot":
+            if self.used:
+                return False
+            self.used = True
+            self.is_active = True
+            return True
+        elif self.switch_type == "toggle":
+            self.is_active = not self.is_active
+            return True
+        elif self.switch_type == "hold":
+            if not self.is_active:
+                self.is_active = True
+                return True
+            return False
+        return False
+
+    def deactivate(self) -> bool:
+        """
+        Deactivate the switch (for hold type when agent leaves).
+
+        Returns True if state changed.
+        """
+        if self.switch_type == "hold" and self.is_active:
+            self.is_active = False
+            return True
+        return False
+
+
+@ObjectRegistry.register("gate")
+class Gate(WorldObj):
+    """
+    Gate that opens/closes based on switch state.
+
+    Gates are controlled by switches. When the controlling switch(es)
+    are active, the gate opens (becomes passable).
+
+    Attributes:
+        is_open: Whether the gate is currently open
+        controlled_by: List of switch IDs that control this gate
+        require_all: If True, all switches must be active; if False, any one
+    """
+
+    def __init__(
+        self,
+        id: str,
+        color: str,
+        is_open: bool = False,
+        controlled_by: Optional[list[str]] = None,
+        require_all: bool = False
+    ):
+        super().__init__(id, color)
+        self.is_open = is_open
+        self.controlled_by = controlled_by or []
+        self.require_all = require_all
+
+    @property
+    def obj_type(self) -> str:
+        return "gate"
+
+    def can_overlap(self) -> bool:
+        return self.is_open
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+    def set_open(self, is_open: bool) -> None:
+        """Set gate open/closed state."""
+        self.is_open = is_open
+
+
+@ObjectRegistry.register("hazard")
+class Hazard(WorldObj):
+    """
+    Hazardous cell that terminates the episode.
+
+    When the agent steps on a hazard, the episode ends with failure.
+    Common examples: lava, spikes, pits.
+
+    Attributes:
+        hazard_type: Type of hazard (for rendering)
+        damage: Damage dealt (for future health system)
+    """
+
+    def __init__(
+        self,
+        id: str,
+        color: str = "red",
+        hazard_type: str = "lava",
+        damage: float = 1.0
+    ):
+        super().__init__(id, color)
+        self.hazard_type = hazard_type
+        self.damage = damage
+
+    @property
+    def obj_type(self) -> str:
+        return "hazard"
+
+    def can_overlap(self) -> bool:
+        # Agent can step on hazards (but will be damaged/killed)
+        return True
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+
+@ObjectRegistry.register("teleporter")
+class Teleporter(WorldObj):
+    """
+    Teleporter that transports agent to linked destination.
+
+    Teleporters come in pairs. When agent steps on one, they are
+    transported to the linked teleporter.
+
+    Attributes:
+        linked_to: ID of the destination teleporter
+        cooldown: Steps before teleporter can be used again
+        current_cooldown: Current cooldown counter
+    """
+
+    def __init__(
+        self,
+        id: str,
+        color: str = "purple",
+        linked_to: Optional[str] = None,
+        cooldown: int = 1
+    ):
+        super().__init__(id, color)
+        self.linked_to = linked_to
+        self.cooldown = cooldown
+        self.current_cooldown = 0
+
+    @property
+    def obj_type(self) -> str:
+        return "teleporter"
+
+    def can_overlap(self) -> bool:
+        return True
+
+    def can_pickup(self) -> bool:
+        return False
+
+    def can_push(self) -> bool:
+        return False
+
+    def can_teleport(self) -> bool:
+        """Check if teleporter is ready to use."""
+        return self.current_cooldown == 0 and self.linked_to is not None
+
+    def use(self) -> None:
+        """Use the teleporter, starting cooldown."""
+        self.current_cooldown = self.cooldown
+
+    def tick(self) -> None:
+        """Reduce cooldown by one step."""
+        if self.current_cooldown > 0:
+            self.current_cooldown -= 1
diff --git a/src/v1_1/multigrid/rendering.py b/src/v1_1/multigrid/rendering.py
new file mode 100644
index 00000000..3c0cdf46
--- /dev/null
+++ b/src/v1_1/multigrid/rendering.py
@@ -0,0 +1,614 @@
+# multigrid/rendering.py
+
+"""
+Rendering System for MultiGrid Environments
+
+Provides vector-based rendering for all tiling types (square, hex, triangle).
+Uses PIL for high-quality polygon drawing suitable for VLM evaluation.
+"""
+
+import math
+import numpy as np
+from abc import ABC, abstractmethod
+from typing import Optional, List, Tuple
+from PIL import Image, ImageDraw
+
+from .objects.base import WorldObj
+from .core import Cell
+
+
+# Color palette for rendering
+COLORS = {
+    "background": (245, 245, 245),  # Light gray
+    "grid_line": (200, 200, 200),   # Gray
+    "wall": (64, 64, 64),           # Dark gray
+    "agent": (0, 100, 200),         # Blue
+    "goal": (0, 200, 0),            # Green
+    "red": (255, 60, 60),
+    "green": (60, 200, 60),
+    "blue": (60, 60, 255),
+    "yellow": (255, 255, 60),
+    "purple": (160, 60, 200),
+    "orange": (255, 165, 60),
+    "white": (255, 255, 255),
+    "black": (0, 0, 0),
+    "grey": (128, 128, 128),
+    "gray": (128, 128, 128),
+    "cyan": (60, 200, 200),
+}
+
+
+class Renderer(ABC):
+    """Abstract renderer supporting multiple visual styles."""
+
+    @abstractmethod
+    def begin_frame(self, width: int, height: int) -> None:
+        """Start a new frame."""
+        pass
+
+    @abstractmethod
+    def draw_cell_background(
+        self,
+        vertices: List[Tuple[float, float]],
+        color: Tuple[int, int, int],
+        outline: Optional[Tuple[int, int, int]] = None
+    ) -> None:
+        """Draw cell polygon background."""
+        pass
+
+    @abstractmethod
+    def draw_object(
+        self,
+        center: Tuple[float, float],
+        obj: WorldObj,
+        size: float
+    ) -> None:
+        """Draw an object at given position."""
+        pass
+
+    @abstractmethod
+    def draw_agent(
+        self,
+        center: Tuple[float, float],
+        facing: float,  # Angle in radians
+        size: float,
+        holding: Optional[WorldObj] = None
+    ) -> None:
+        """Draw the agent."""
+        pass
+
+    @abstractmethod
+    def draw_goal(
+        self,
+        center: Tuple[float, float],
+        size: float
+    ) -> None:
+        """Draw the goal marker."""
+        pass
+
+    @abstractmethod
+    def end_frame(self) -> np.ndarray:
+        """Finish frame and return RGB array."""
+        pass
+
+
+class MinimalRenderer(Renderer):
+    """Clean vector-based rendering for VLM evaluation using PIL."""
+
+    def __init__(self):
+        self.img: Optional[Image.Image] = None
+        self.draw: Optional[ImageDraw.ImageDraw] = None
+        self.width = 0
+        self.height = 0
+
+    def begin_frame(self, width: int, height: int) -> None:
+        """Start a new frame."""
+        self.width = width
+        self.height = height
+        self.img = Image.new('RGB', (width, height), COLORS["background"])
+        self.draw = ImageDraw.Draw(self.img)
+
+    def draw_cell_background(
+        self,
+        vertices: List[Tuple[float, float]],
+        color: Tuple[int, int, int],
+        outline: Optional[Tuple[int, int, int]] = None
+    ) -> None:
+        """Draw cell polygon background."""
+        if self.draw is None:
+            return
+
+        # Convert to pixel coordinates
+        pixel_vertices = [(int(x), int(y)) for x, y in vertices]
+
+        if outline is None:
+            outline = COLORS["grid_line"]
+
+        self.draw.polygon(pixel_vertices, fill=color, outline=outline)
+
+    def draw_object(
+        self,
+        center: Tuple[float, float],
+        obj: WorldObj,
+        size: float
+    ) -> None:
+        """Draw an object at given position."""
+        if self.draw is None:
+            return
+
+        x, y = int(center[0]), int(center[1])
+        color = self._color_name_to_rgb(obj.color)
+        r = int(size * 0.4)
+
+        obj_type = obj.obj_type
+
+        if obj_type == "wall":
+            # Draw wall as filled square
+            self.draw.rectangle(
+                [x - r, y - r, x + r, y + r],
+                fill=COLORS["wall"],
+                outline=COLORS["black"]
+            )
+
+        elif obj_type == "movable":
+            # Draw movable as circle
+            self.draw.ellipse(
+                [x - r, y - r, x + r, y + r],
+                fill=color,
+                outline=COLORS["black"]
+            )
+
+        elif obj_type == "zone":
+            # Draw zone as semi-transparent circle (just outline)
+            self.draw.ellipse(
+                [x - r, y - r, x + r, y + r],
+                fill=None,
+                outline=color,
+                width=2
+            )
+
+        elif obj_type == "key":
+            # Draw key as a small circle with a stem (simplified key shape)
+            key_head_r = int(r * 0.5)
+            stem_width = int(r * 0.2)
+            # Key head (circle)
+            self.draw.ellipse(
+                [x - key_head_r, y - r, x + key_head_r, y - r + key_head_r * 2],
+                fill=color,
+                outline=COLORS["black"]
+            )
+            # Key stem (rectangle)
+            self.draw.rectangle(
+                [x - stem_width, y, x + stem_width, y + r],
+                fill=color,
+                outline=COLORS["black"]
+            )
+            # Key teeth
+            tooth_y = y + int(r * 0.5)
+            self.draw.rectangle(
+                [x, tooth_y, x + int(r * 0.3), tooth_y + int(r * 0.2)],
+                fill=color
+            )
+
+        elif obj_type == "door":
+            # Draw door as vertical rectangle with handle
+            door_width = int(r * 0.6)
+            # Check if door is open/locked
+            is_open = getattr(obj, 'is_open', False)
+            is_locked = getattr(obj, 'is_locked', True)
+
+            if is_open:
+                # Open door - just an outline
+                self.draw.rectangle(
+                    [x - door_width, y - r, x + door_width, y + r],
+                    fill=None,
+                    outline=color,
+                    width=2
+                )
+            else:
+                # Closed door - filled
+                self.draw.rectangle(
+                    [x - door_width, y - r, x + door_width, y + r],
+                    fill=color,
+                    outline=COLORS["black"]
+                )
+                # Draw lock indicator if locked
+                if is_locked:
+                    lock_r = int(r * 0.2)
+                    self.draw.ellipse(
+                        [x - lock_r, y - lock_r, x + lock_r, y + lock_r],
+                        fill=COLORS["black"]
+                    )
+
+        elif obj_type == "switch":
+            # Draw switch as a small square with indicator
+            switch_r = int(r * 0.5)
+            is_active = getattr(obj, 'is_active', False)
+
+            # Base
+            self.draw.rectangle(
+                [x - switch_r, y - switch_r, x + switch_r, y + switch_r],
+                fill=COLORS["grey"],
+                outline=COLORS["black"]
+            )
+            # Indicator (lit if active)
+            indicator_r = int(r * 0.25)
+            indicator_color = color if is_active else COLORS["black"]
+            self.draw.ellipse(
+                [x - indicator_r, y - indicator_r, x + indicator_r, y + indicator_r],
+                fill=indicator_color
+            )
+
+        elif obj_type == "gate":
+            # Draw gate as vertical bars
+            is_open = getattr(obj, 'is_open', False)
+            bar_width = int(r * 0.15)
+            num_bars = 3
+
+            if is_open:
+                # Open gate - bars to the side
+                for i in range(num_bars):
+                    bar_x = x + r + i * bar_width * 2
+                    self.draw.rectangle(
+                        [bar_x, y - r, bar_x + bar_width, y + r],
+                        fill=color,
+                        outline=COLORS["black"]
+                    )
+            else:
+                # Closed gate - bars blocking
+                spacing = (r * 2) // (num_bars + 1)
+                for i in range(num_bars):
+                    bar_x = x - r + spacing * (i + 1)
+                    self.draw.rectangle(
+                        [bar_x - bar_width, y - r, bar_x + bar_width, y + r],
+                        fill=color,
+                        outline=COLORS["black"]
+                    )
+
+        elif obj_type == "hazard":
+            # Draw hazard as warning triangle or lava pool
+            hazard_type = getattr(obj, 'hazard_type', 'lava')
+            if hazard_type == "lava":
+                # Lava - wavy orange/red
+                self.draw.ellipse(
+                    [x - r, y - int(r * 0.5), x + r, y + int(r * 0.5)],
+                    fill=COLORS["orange"],
+                    outline=COLORS["red"]
+                )
+            else:
+                # Generic hazard - warning triangle
+                triangle = [
+                    (x, y - r),
+                    (x + r, y + r),
+                    (x - r, y + r)
+                ]
+                self.draw.polygon(triangle, fill=COLORS["red"], outline=COLORS["black"])
+                # Exclamation mark
+                self.draw.rectangle(
+                    [x - 2, y - int(r * 0.3), x + 2, y + int(r * 0.2)],
+                    fill=COLORS["black"]
+                )
+                self.draw.ellipse(
+                    [x - 2, y + int(r * 0.4), x + 2, y + int(r * 0.6)],
+                    fill=COLORS["black"]
+                )
+
+        elif obj_type == "teleporter":
+            # Draw teleporter as concentric circles (portal)
+            for i in range(3, 0, -1):
+                ring_r = int(r * i / 3)
+                ring_color = color if i % 2 == 1 else COLORS["white"]
+                self.draw.ellipse(
+                    [x - ring_r, y - ring_r, x + ring_r, y + ring_r],
+                    fill=ring_color,
+                    outline=COLORS["black"] if i == 3 else None
+                )
+
+        else:
+            # Default: draw as diamond
+            diamond = [
+                (x, y - r),
+                (x + r, y),
+                (x, y + r),
+                (x - r, y)
+            ]
+            self.draw.polygon(diamond, fill=color, outline=COLORS["black"])
+
+    def draw_agent(
+        self,
+        center: Tuple[float, float],
+        facing: float,  # Angle in radians
+        size: float,
+        holding: Optional[WorldObj] = None
+    ) -> None:
+        """Draw the agent as a triangle pointing in facing direction."""
+        if self.draw is None:
+            return
+
+        x, y = center[0], center[1]
+        r = size * 0.5
+
+        # Triangle vertices relative to center, pointing in facing direction
+        # Tip at front, base at back
+        tip_angle = facing
+        base_angle_1 = facing + math.pi * 2 / 3
+        base_angle_2 = facing - math.pi * 2 / 3
+
+        tip = (x + r * math.cos(tip_angle), y + r * math.sin(tip_angle))
+        base1 = (x + r * 0.6 * math.cos(base_angle_1), y + r * 0.6 * math.sin(base_angle_1))
+        base2 = (x + r * 0.6 * math.cos(base_angle_2), y + r * 0.6 * math.sin(base_angle_2))
+
+        triangle = [
+            (int(tip[0]), int(tip[1])),
+            (int(base1[0]), int(base1[1])),
+            (int(base2[0]), int(base2[1]))
+        ]
+
+        self.draw.polygon(triangle, fill=COLORS["agent"], outline=COLORS["black"])
+
+        # If holding something, draw a small indicator
+        if holding is not None:
+            carry_r = int(r * 0.25)
+            carry_x = int(x)
+            carry_y = int(y)
+            carry_color = self._color_name_to_rgb(holding.color)
+            self.draw.ellipse(
+                [carry_x - carry_r, carry_y - carry_r, carry_x + carry_r, carry_y + carry_r],
+                fill=carry_color,
+                outline=COLORS["white"]
+            )
+
+    def draw_goal(
+        self,
+        center: Tuple[float, float],
+        size: float
+    ) -> None:
+        """Draw the goal marker as a star."""
+        if self.draw is None:
+            return
+
+        x, y = int(center[0]), int(center[1])
+        r = int(size * 0.4)
+
+        # Draw as filled green square with border
+        self.draw.rectangle(
+            [x - r, y - r, x + r, y + r],
+            fill=COLORS["goal"],
+            outline=COLORS["black"]
+        )
+
+    def end_frame(self) -> np.ndarray:
+        """Finish frame and return RGB array."""
+        if self.img is None:
+            return np.zeros((64, 64, 3), dtype=np.uint8)
+        return np.array(self.img)
+
+    def _color_name_to_rgb(self, color_name: str) -> Tuple[int, int, int]:
+        """Convert color name to RGB tuple."""
+        return COLORS.get(color_name.lower(), COLORS["grey"])
+
+
+def get_square_vertices(
+    center: Tuple[float, float],
+    size: float
+) -> List[Tuple[float, float]]:
+    """Get vertices for a square cell."""
+    x, y = center
+    half = size / 2
+    return [
+        (x - half, y - half),
+        (x + half, y - half),
+        (x + half, y + half),
+        (x - half, y + half)
+    ]
+
+
+def get_hex_vertices(
+    center: Tuple[float, float],
+    size: float
+) -> List[Tuple[float, float]]:
+    """Get vertices for a pointy-top hexagon."""
+    x, y = center
+    vertices = []
+    for i in range(6):
+        angle = math.pi / 2 - i * math.pi / 3  # Start from top, go clockwise
+        vx = x + size * math.cos(angle)
+        vy = y - size * math.sin(angle)  # Flip y
+        vertices.append((vx, vy))
+    return vertices
+
+
+def get_triangle_vertices(
+    hex_center: Tuple[float, float],
+    hex_size: float,
+    triangle_index: int
+) -> List[Tuple[float, float]]:
+    """Get vertices for a triangle within a hexagon."""
+    cx, cy = hex_center
+
+    # Vertices of the hexagon
+    hex_vertices = []
+    for i in range(6):
+        angle = math.pi / 2 - i * math.pi / 3
+        vx = cx + hex_size * math.cos(angle)
+        vy = cy - hex_size * math.sin(angle)
+        hex_vertices.append((vx, vy))
+
+    # Triangle i uses: center, vertex i, vertex (i+1)%6
+    return [
+        (cx, cy),
+        hex_vertices[triangle_index],
+        hex_vertices[(triangle_index + 1) % 6]
+    ]
+
+
+def _dim_color(color: Tuple[int, int, int], factor: float = 0.4) -> Tuple[int, int, int]:
+    """Dim a color by blending it toward dark gray."""
+    return tuple(int(c * factor) for c in color)
+
+
+def render_multigrid(
+    state,  # WorldState
+    tiling,  # Tiling
+    width: int = 640,
+    height: int = 640,
+    goal_cell_id: Optional[str] = None,
+    visible_cells: Optional[set] = None,
+    explored_cells: Optional[set] = None,
+) -> np.ndarray:
+    """
+    Render a MultiGrid world state to an RGB image.
+
+    Args:
+        state: WorldState object
+        tiling: Tiling object
+        width: Output image width
+        height: Output image height
+        goal_cell_id: Optional cell ID to mark as goal
+        visible_cells: Set of currently visible cell IDs (None = all visible)
+        explored_cells: Set of previously explored cell IDs (None = all explored)
+
+    Returns:
+        RGB numpy array of shape (height, width, 3)
+    """
+    renderer = MinimalRenderer()
+    renderer.begin_frame(width, height)
+
+    # Calculate cell size based on tiling type and canvas size
+    tiling_name = tiling.name
+    margin = 0.05
+    usable_width = width * (1 - 2 * margin)
+    usable_height = height * (1 - 2 * margin)
+    offset_x = width * margin
+    offset_y = height * margin
+
+    # Draw all cells
+    for cell_id, cell in tiling.cells.items():
+        # Get canonical position and convert to pixel coordinates
+        pos = cell.position_hint
+        px = offset_x + pos[0] * usable_width
+        py = offset_y + pos[1] * usable_height
+
+        # Calculate cell size
+        if tiling_name == "square":
+            num_cells = max(tiling.width, tiling.height)
+            cell_size = min(usable_width, usable_height) / num_cells * 0.9
+            vertices = get_square_vertices((px, py), cell_size)
+        elif tiling_name == "hex":
+            hex_size = min(usable_width, usable_height) / (tiling.height * 2) * 0.9
+            vertices = get_hex_vertices((px, py), hex_size)
+        elif tiling_name == "triangle":
+            # Use stored tiling_coords for accurate rendering
+            tc = cell.tiling_coords
+            if tc is not None:
+                hc = tc["hex_center"]
+                tri_idx = tc["tri_idx"]
+                hex_size_norm = tc["hex_size"]
+                # Convert hex center from normalized to pixel coords
+                hc_px = offset_x + hc[0] * usable_width
+                hc_py = offset_y + hc[1] * usable_height
+                # Scale hex size from normalized to pixel space
+                hex_size_px = hex_size_norm * min(usable_width, usable_height)
+            else:
+                # Fallback for cells without tiling_coords
+                hc_px, hc_py = px, py
+                hex_size_px = min(usable_width, usable_height) / (tiling.height * 2) * 0.9
+                _, _, _, tri_idx_str = cell_id.split("_")
+                tri_idx = int(tri_idx_str)
+            vertices = get_triangle_vertices((hc_px, hc_py), hex_size_px, tri_idx)
+        elif tiling_name in ("3464", "488"):
+            # Archimedean tilings: read pre-computed vertices from tiling_coords
+            tc = cell.tiling_coords
+            if tc is not None and "vertices" in tc:
+                # Vertices are in normalized [0,1] space; scale to pixel space
+                vertices = [
+                    (offset_x + vx * usable_width, offset_y + vy * usable_height)
+                    for vx, vy in tc["vertices"]
+                ]
+            else:
+                # Fallback: draw a small square at the position hint
+                cell_size = min(usable_width, usable_height) / 10
+                vertices = get_square_vertices((px, py), cell_size)
+        else:
+            # Fallback to square
+            cell_size = min(usable_width, usable_height) / 10
+            vertices = get_square_vertices((px, py), cell_size)
+
+        # Determine cell color
+        if goal_cell_id and cell_id == goal_cell_id:
+            color = COLORS["goal"]
+        else:
+            color = COLORS["background"]
+
+        # Apply partial observability dimming
+        if visible_cells is not None and cell_id not in visible_cells:
+            if explored_cells is not None and cell_id in explored_cells:
+                # Previously explored but not currently visible: dim
+                color = _dim_color(color)
+            else:
+                # Never explored: dark background
+                color = (30, 30, 30)
+
+        renderer.draw_cell_background(vertices, color)
+
+    # Calculate object/agent size
+    if tiling_name == "square":
+        obj_size = min(usable_width, usable_height) / max(tiling.width, tiling.height) * 0.7
+    elif tiling_name == "hex":
+        obj_size = min(usable_width, usable_height) / (tiling.height * 2) * 0.8
+    elif tiling_name in ("3464", "488"):
+        # Archimedean tilings: estimate size from total cell count
+        num_cells = max(len(tiling.cells), 1)
+        # Approximate: tiles_per_row ~ sqrt(num_cells * aspect_ratio)
+        tiles_per_side = max(math.sqrt(num_cells), 1)
+        obj_size = min(usable_width, usable_height) / tiles_per_side * 0.5
+    else:
+        obj_size = min(usable_width, usable_height) / (tiling.height * 3) * 0.8
+
+    # Draw objects (skip non-visible cells)
+    for obj_id, obj in state.objects.items():
+        if obj.cell_id is None:
+            continue
+        if visible_cells is not None and obj.cell_id not in visible_cells:
+            continue
+        cell = tiling.cells.get(obj.cell_id)
+        if cell is None:
+            continue
+
+        pos = cell.position_hint
+        px = offset_x + pos[0] * usable_width
+        py = offset_y + pos[1] * usable_height
+        renderer.draw_object((px, py), obj, obj_size)
+
+    # Draw goal marker (skip if not visible)
+    if goal_cell_id and goal_cell_id in tiling.cells:
+        if visible_cells is None or goal_cell_id in visible_cells:
+            goal_cell = tiling.cells[goal_cell_id]
+            pos = goal_cell.position_hint
+            px = offset_x + pos[0] * usable_width
+            py = offset_y + pos[1] * usable_height
+            renderer.draw_goal((px, py), obj_size)
+
+    # Draw agent
+    agent_cell = tiling.cells.get(state.agent.cell_id)
+    if agent_cell is not None:
+        pos = agent_cell.position_hint
+        px = offset_x + pos[0] * usable_width
+        py = offset_y + pos[1] * usable_height
+
+        # Calculate facing angle
+        num_dirs = len(tiling.directions)
+        # Facing 0 = first direction (e.g., north for hex, edge0 for triangle)
+        facing_angle = -state.agent.facing * (2 * math.pi / num_dirs)
+
+        # Adjust based on tiling orientation
+        if tiling_name == "square":
+            # Square: 0=north, 1=east, 2=south, 3=west
+            facing_angle = -math.pi / 2 - state.agent.facing * (math.pi / 2)
+        elif tiling_name == "hex":
+            # Hex: 0=north, 1=northeast, etc.
+            facing_angle = -math.pi / 2 - state.agent.facing * (math.pi / 3)
+
+        renderer.draw_agent((px, py), facing_angle, obj_size, state.agent.holding)
+
+    return renderer.end_frame()
diff --git a/src/v1_1/multigrid/test_multigrid.py b/src/v1_1/multigrid/test_multigrid.py
new file mode 100644
index 00000000..8fef4030
--- /dev/null
+++ b/src/v1_1/multigrid/test_multigrid.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+"""
+Test script for the multigrid module.
+
+Tests rendering, goal system, and all tiling types.
+"""
+
+import sys
+from pathlib import Path
+import numpy as np
+
+# Ensure module can be imported
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from multigrid.env import MultiGridEnv, TilingRegistry
+from multigrid.tilings import SquareTiling, HexTiling, TriangleTiling
+from multigrid.goals import (
+    ReachPositionGoal,
+    ReachCanonicalPositionGoal,
+    CollectAllGoal,
+    create_goal_from_spec,
+)
+from multigrid.rendering import render_multigrid
+from multigrid.agent import Action
+
+
+def test_tiling_registry():
+    """Test tiling registry returns correct types."""
+    print("Testing TilingRegistry...")
+
+    square = TilingRegistry.get("square")
+    assert isinstance(square, SquareTiling), "Expected SquareTiling"
+
+    hex_tiling = TilingRegistry.get("hex")
+    assert isinstance(hex_tiling, HexTiling), "Expected HexTiling"
+
+    triangle = TilingRegistry.get("triangle")
+    assert isinstance(triangle, TriangleTiling), "Expected TriangleTiling"
+
+    print("  ✓ TilingRegistry works correctly")
+
+
+def test_square_tiling():
+    """Test square tiling basic operations."""
+    print("Testing SquareTiling...")
+
+    tiling = SquareTiling()
+    tiling.generate_graph(5, 5, seed=42)
+
+    # Check cell count
+    assert len(tiling.cells) == 25, f"Expected 25 cells, got {len(tiling.cells)}"
+
+    # Check directions
+    assert len(tiling.directions) == 4, "Square should have 4 directions"
+
+    # Check neighbor connectivity
+    center = "sq_2_2"
+    neighbors = []
+    for d in tiling.directions:
+        n = tiling.get_neighbor(center, d)
+        if n:
+            neighbors.append(n)
+    assert len(neighbors) == 4, f"Center cell should have 4 neighbors, got {len(neighbors)}"
+
+    print("  ✓ SquareTiling works correctly")
+
+
+def test_hex_tiling():
+    """Test hex tiling basic operations."""
+    print("Testing HexTiling...")
+
+    tiling = HexTiling()
+    tiling.generate_graph(3, 3, seed=42)
+
+    # Check directions
+    assert len(tiling.directions) == 6, "Hex should have 6 directions"
+
+    # Check cell count (varies with grid arrangement)
+    assert len(tiling.cells) > 0, "Should have some cells"
+
+    print(f"  ✓ HexTiling works correctly ({len(tiling.cells)} cells)")
+
+
+def test_triangle_tiling():
+    """Test triangle tiling - this was the problematic one."""
+    print("Testing TriangleTiling...")
+
+    tiling = TriangleTiling()
+    tiling.generate_graph(3, 3, seed=42)
+
+    # Check directions
+    assert len(tiling.directions) == 3, "Triangle should have 3 directions"
+
+    # Check cell count
+    assert len(tiling.cells) > 0, "Should have some cells"
+
+    # Verify all cells have some neighbors
+    for cell_id, cell in tiling.cells.items():
+        neighbor_count = sum(1 for d in tiling.directions if tiling.get_neighbor(cell_id, d))
+        # Triangles can have 1-3 neighbors depending on position
+        assert neighbor_count >= 1, f"Cell {cell_id} has no neighbors"
+
+    print(f"  ✓ TriangleTiling works correctly ({len(tiling.cells)} cells)")
+
+
+def test_goals():
+    """Test goal system."""
+    print("Testing Goal System...")
+
+    tiling = SquareTiling()
+    tiling.generate_graph(5, 5, seed=42)
+
+    # Test creating goals from spec
+    goal_spec = {
+        "type": "reach_position",
+        "target": {"x": 0.9, "y": 0.9}
+    }
+    goal = create_goal_from_spec(goal_spec, tiling)
+    assert goal is not None, "Goal should be created"
+    assert hasattr(goal, 'check'), "Goal should have check method"
+
+    # Test collect_all goal
+    collect_spec = {
+        "type": "collect_all",
+        "target_ids": ["key_1", "key_2"]
+    }
+    collect_goal = create_goal_from_spec(collect_spec, tiling)
+    assert isinstance(collect_goal, CollectAllGoal), "Should be CollectAllGoal"
+
+    print("  ✓ Goal system works correctly")
+
+
+def test_rendering():
+    """Test rendering for all tiling types."""
+    print("Testing Rendering...")
+
+    for tiling_name, tiling_class in [
+        ("square", SquareTiling),
+        ("hex", HexTiling),
+        ("triangle", TriangleTiling)
+    ]:
+        print(f"  Testing {tiling_name} rendering...")
+
+        task_spec = {
+            "task_id": f"test_{tiling_name}",
+            "seed": 42,
+            "tiling": {
+                "type": tiling_name,
+                "grid_size": {"width": 5, "height": 5}
+            },
+            "scene": {
+                "agent": {
+                    "position": {"x": 0.1, "y": 0.1},
+                    "facing": 0
+                },
+                "objects": [
+                    {
+                        "id": "box_1",
+                        "type": "movable",
+                        "color": "blue",
+                        "position": {"x": 0.5, "y": 0.5}
+                    }
+                ]
+            },
+            "goal": {
+                "type": "reach_position",
+                "target": {"x": 0.9, "y": 0.9}
+            },
+            "limits": {
+                "max_steps": 100
+            }
+        }
+
+        env = MultiGridEnv(task_spec, tiling=tiling_name, render_mode="rgb_array")
+        obs, info = env.reset()
+
+        # Check observation is valid
+        assert obs.shape == (64, 64, 3), f"Expected (64,64,3), got {obs.shape}"
+        assert obs.dtype == np.uint8, f"Expected uint8, got {obs.dtype}"
+
+        # Check it's not all black
+        assert obs.sum() > 0, "Observation should not be all black"
+
+        # Test high-res render
+        frame = env.render()
+        assert frame.shape == (640, 640, 3), f"Expected (640,640,3), got {frame.shape}"
+        assert frame.sum() > 0, "Render should not be all black"
+
+        print(f"    ✓ {tiling_name} renders correctly")
+
+    print("  ✓ All rendering works correctly")
+
+
+def test_env_step():
+    """Test environment stepping."""
+    print("Testing Environment Step...")
+
+    task_spec = {
+        "task_id": "test_step",
+        "seed": 42,
+        "tiling": {
+            "type": "square",
+            "grid_size": {"width": 5, "height": 5}
+        },
+        "scene": {
+            "agent": {
+                "position": {"x": 0.5, "y": 0.5},
+                "facing": 0
+            },
+            "objects": []
+        },
+        "goal": {
+            "type": "reach_position",
+            "target": {"x": 0.9, "y": 0.9}
+        },
+        "limits": {
+            "max_steps": 100
+        }
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="rgb_array")
+    obs, info = env.reset()
+
+    initial_cell = env.state.agent.cell_id
+
+    # Turn right
+    obs, reward, terminated, truncated, info = env.step(Action.TURN_RIGHT.value)
+    assert not terminated, "Should not terminate from turn"
+
+    # Move forward
+    obs, reward, terminated, truncated, info = env.step(Action.FORWARD.value)
+    new_cell = env.state.agent.cell_id
+
+    # Should have moved (or stayed if blocked)
+    print(f"    Agent moved from {initial_cell} to {new_cell}")
+
+    print("  ✓ Environment stepping works correctly")
+
+
+def test_state_dict():
+    """Test state dictionary export."""
+    print("Testing State Dict Export...")
+
+    task_spec = {
+        "task_id": "test_state",
+        "seed": 42,
+        "tiling": {
+            "type": "square",
+            "grid_size": {"width": 5, "height": 5}
+        },
+        "scene": {
+            "agent": {
+                "position": {"x": 0.5, "y": 0.5},
+                "facing": 0
+            },
+            "objects": []
+        },
+        "goal": {
+            "type": "reach_position",
+            "target": {"x": 0.9, "y": 0.9}
+        },
+        "limits": {
+            "max_steps": 100
+        }
+    }
+
+    env = MultiGridEnv(task_spec, tiling="square", render_mode="state_dict")
+    env.reset()
+
+    state_dict = env.get_state_dict()
+
+    assert "agent" in state_dict, "State should have agent"
+    assert "objects" in state_dict, "State should have objects"
+    assert "step" in state_dict, "State should have step"
+    assert "goal_achieved" in state_dict, "State should have goal_achieved"
+
+    print("  ✓ State dict export works correctly")
+
+
+def run_all_tests():
+    """Run all tests."""
+    print("=" * 60)
+    print("MultiGrid Module Test Suite")
+    print("=" * 60)
+    print()
+
+    tests = [
+        test_tiling_registry,
+        test_square_tiling,
+        test_hex_tiling,
+        test_triangle_tiling,
+        test_goals,
+        test_rendering,
+        test_env_step,
+        test_state_dict,
+    ]
+
+    passed = 0
+    failed = 0
+
+    for test in tests:
+        try:
+            test()
+            passed += 1
+        except Exception as e:
+            print(f"  ✗ {test.__name__} FAILED: {e}")
+            failed += 1
+
+    print()
+    print("=" * 60)
+    print(f"Results: {passed} passed, {failed} failed")
+    print("=" * 60)
+
+    return failed == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)
diff --git a/src/v1_1/multigrid/tilings/__init__.py b/src/v1_1/multigrid/tilings/__init__.py
new file mode 100644
index 00000000..2c0706d4
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/__init__.py
@@ -0,0 +1,15 @@
+# tilings/__init__.py
+
+from .square import SquareTiling
+from .hex import HexTiling
+from .triangle import TriangleTiling
+from .archimedean_3464 import Archimedean3464Tiling
+from .archimedean_488 import Archimedean488Tiling
+
+__all__ = [
+    'SquareTiling',
+    'HexTiling',
+    'TriangleTiling',
+    'Archimedean3464Tiling',
+    'Archimedean488Tiling',
+]
diff --git a/src/v1_1/multigrid/tilings/archimedean_3464.py b/src/v1_1/multigrid/tilings/archimedean_3464.py
new file mode 100644
index 00000000..7c5d77e5
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/archimedean_3464.py
@@ -0,0 +1,394 @@
+# tilings/archimedean_3464.py
+
+"""
+Rhombitrihexagonal (3-4-6-4) Archimedean Tiling
+
+This tiling consists of regular triangles, squares, and hexagons meeting at
+each vertex in the pattern 3-4-6-4:
+  - Each hexagon is surrounded by 6 squares and 6 triangles.
+  - Each square is shared between 2 hexagons.
+  - Each triangle is shared between 3 hexagons.
+
+Construction:
+  1. Place hexagons on a lattice with translation vectors:
+       a1 = (1 + sqrt(3), 0) * s
+       a2 = ((1 + sqrt(3))/2, (3 + sqrt(3))/2) * s
+  2. For each hexagon, compute the 6 outward squares (on each edge) and
+     6 equilateral triangles (at each vertex).
+  3. Deduplicate tiles that are shared between hexagons using a vertex-
+     based key (rounded to a tolerance).
+  4. Detect adjacency by shared edges (2 shared vertices).
+"""
+
+import math
+from collections import deque
+from typing import Optional
+from ..base import Tiling
+from ..core import Cell
+
+
+# Epsilon for floating-point vertex matching
+_EPS = 1e-6
+
+# Rounding precision for deduplication keys
+_ROUND_PREC = 5
+
+
+def _centroid(verts: list[tuple[float, float]]) -> tuple[float, float]:
+    """Compute the centroid of a polygon given its vertices."""
+    n = len(verts)
+    cx = sum(v[0] for v in verts) / n
+    cy = sum(v[1] for v in verts) / n
+    return (cx, cy)
+
+
+def _vert_key(verts: list[tuple[float, float]]) -> tuple:
+    """
+    Create a hashable deduplication key from polygon vertices.
+    Sorts the rounded vertices so that the same polygon found from
+    different hexagons produces the same key.
+    """
+    rounded = tuple(sorted(
+        (round(v[0], _ROUND_PREC), round(v[1], _ROUND_PREC)) for v in verts
+    ))
+    return rounded
+
+
+def _vertices_match(v1: tuple[float, float], v2: tuple[float, float],
+                    eps: float = _EPS) -> bool:
+    """Check if two 2D points are within epsilon."""
+    return abs(v1[0] - v2[0]) < eps and abs(v1[1] - v2[1]) < eps
+
+
+def _shared_vertex_count(verts_a: list[tuple[float, float]],
+                         verts_b: list[tuple[float, float]],
+                         eps: float = _EPS) -> int:
+    """Count the number of shared vertices between two polygons."""
+    count = 0
+    for va in verts_a:
+        for vb in verts_b:
+            if _vertices_match(va, vb, eps):
+                count += 1
+    return count
+
+
+def _generate_hex_surround(hc: tuple[float, float], s: float):
+    """
+    Generate all tiles surrounding one hexagon centered at hc with edge length s.
+
+    Returns lists of (tile_type, vertices) for:
+      - 1 hexagon
+      - 6 squares (one on each hex edge)
+      - 6 triangles (one at each hex vertex)
+    """
+    hex_R = s  # circumradius of regular hexagon with edge s
+
+    # Pointy-top hexagon: first vertex at top, going clockwise
+    hverts = []
+    for i in range(6):
+        angle = math.pi / 2 - i * math.pi / 3
+        hverts.append((hc[0] + hex_R * math.cos(angle),
+                        hc[1] + hex_R * math.sin(angle)))
+
+    tiles = []
+
+    # The hexagon itself
+    tiles.append(("hexagon", list(hverts)))
+
+    # Squares on each of the 6 edges
+    square_list = []
+    for i in range(6):
+        va = hverts[i]
+        vb = hverts[(i + 1) % 6]
+        # Edge direction
+        ex, ey = vb[0] - va[0], vb[1] - va[1]
+        el = math.sqrt(ex * ex + ey * ey)
+        ed = (ex / el, ey / el)
+        # Two candidate perpendiculars
+        p1 = (-ed[1], ed[0])
+        p2 = (ed[1], -ed[0])
+        # Pick the one pointing outward from hex center
+        mid = ((va[0] + vb[0]) / 2 - hc[0], (va[1] + vb[1]) / 2 - hc[1])
+        if p1[0] * mid[0] + p1[1] * mid[1] > 0:
+            perp = p1
+        else:
+            perp = p2
+        # Square vertices: va, vb, vb + s*perp, va + s*perp
+        vc = (vb[0] + s * perp[0], vb[1] + s * perp[1])
+        vd = (va[0] + s * perp[0], va[1] + s * perp[1])
+        sq_verts = [va, vb, vc, vd]
+        tiles.append(("square", sq_verts))
+        square_list.append(sq_verts)
+
+    # Triangles at each hex vertex
+    for i in range(6):
+        prev = (i - 1) % 6
+        # Triangle at vertex i uses:
+        #   - hex vertex i
+        #   - outer vertex of square on edge (i-1), closest to vertex i
+        #     = square_list[prev][3] (the vd of that square, which was from va + perp)
+        #     Actually: square on edge prev has va=hverts[prev], vb=hverts[i]
+        #     Its outer verts are: vc (from vb=hverts[i]), vd (from va=hverts[prev])
+        #     So the outer vert near hverts[i] is vc = square_list[prev][2]
+        #   - outer vertex of square on edge i, closest to vertex i
+        #     = square_list[i][3] (the vd of that square, which was from va=hverts[i])
+        tri_verts = [hverts[i], square_list[prev][2], square_list[i][3]]
+        tiles.append(("triangle", tri_verts))
+
+    return tiles
+
+
+class Archimedean3464Tiling(Tiling):
+    """
+    Rhombitrihexagonal (3-4-6-4) Archimedean tiling.
+
+    Contains triangles (3 neighbors), squares (4 neighbors), and
+    hexagons (6 neighbors) arranged so that each vertex is surrounded
+    by a triangle, square, hexagon, square in that order.
+    """
+
+    # Maximum edge count across all tile types in the tiling
+    _MAX_EDGES = 6
+
+    def __init__(self):
+        super().__init__()
+        self._cell_list: list[str] = []
+        self._grid_cols = 0
+        self._grid_rows = 0
+
+    @property
+    def name(self) -> str:
+        return "3464"
+
+    @property
+    def directions(self) -> list[str]:
+        return [f"edge_{i}" for i in range(self._MAX_EDGES)]
+
+    def generate_graph(self, width: int, height: int, seed: int = 0
+                       ) -> dict[str, Cell]:
+        """
+        Generate the 3-4-6-4 tiling as an adjacency graph.
+
+        Places hexagons on a lattice, generates surrounding squares and
+        triangles, deduplicates shared tiles, then detects adjacency by
+        shared edges.
+
+        Args:
+            width: Number of hexagon columns in the lattice.
+            height: Number of hexagon rows in the lattice.
+            seed: Random seed (unused for deterministic tilings).
+
+        Returns:
+            Dictionary of cell_id -> Cell.
+        """
+        self.width = width
+        self.height = height
+        self._grid_cols = width
+        self._grid_rows = height
+        self.cells = {}
+
+        s = 1.0  # edge length
+
+        # Translation vectors for the hexagon lattice
+        a1 = ((1 + math.sqrt(3)) * s, 0.0)
+        a2 = (((1 + math.sqrt(3)) / 2) * s, ((3 + math.sqrt(3)) / 2) * s)
+
+        # Step 1: Generate all tiles from all hexagon positions, with dedup
+        # unique_tiles: vert_key -> {tile_type, vertices (raw)}
+        unique_tiles: dict[tuple, dict] = {}
+
+        for row in range(height):
+            for col in range(width):
+                hcx = col * a1[0] + row * a2[0]
+                hcy = col * a1[1] + row * a2[1]
+                tiles = _generate_hex_surround((hcx, hcy), s)
+                for tile_type, verts in tiles:
+                    key = _vert_key(verts)
+                    if key not in unique_tiles:
+                        unique_tiles[key] = {
+                            "tile_type": tile_type,
+                            "vertices": verts,
+                            "n_sides": len(verts),
+                        }
+
+        # Step 2: Assign cell IDs and compute raw centers
+        tile_list = []
+        counters = {"hexagon": 0, "square": 0, "triangle": 0}
+        for key, tile in unique_tiles.items():
+            tt = tile["tile_type"]
+            idx = counters[tt]
+            counters[tt] += 1
+            cell_id = f"a3464_{tt[0]}_{idx}"  # e.g., a3464_h_0, a3464_s_3, a3464_t_7
+            center = _centroid(tile["vertices"])
+            tile_list.append((cell_id, tile["tile_type"], tile["vertices"],
+                              tile["n_sides"], center))
+
+        # Step 3: Normalize all positions to [0,1]
+        all_xs = []
+        all_ys = []
+        for _, _, verts, _, _ in tile_list:
+            for vx, vy in verts:
+                all_xs.append(vx)
+                all_ys.append(vy)
+
+        min_x, max_x = min(all_xs), max(all_xs)
+        min_y, max_y = min(all_ys), max(all_ys)
+        range_x = max_x - min_x if max_x > min_x else 1.0
+        range_y = max_y - min_y if max_y > min_y else 1.0
+        scale = max(range_x, range_y)
+        if scale < _EPS:
+            scale = 1.0
+
+        def normalize(px, py):
+            nx = (px - min_x) / scale
+            ny = (py - min_y) / scale
+            offset_x = (1.0 - range_x / scale) / 2
+            offset_y = (1.0 - range_y / scale) / 2
+            return nx + offset_x, ny + offset_y
+
+        for cell_id, tile_type, verts, n_sides, center in tile_list:
+            norm_center = normalize(center[0], center[1])
+            norm_verts = [normalize(vx, vy) for vx, vy in verts]
+
+            tiling_coords = {
+                "tile_type": tile_type,
+                "vertices": norm_verts,
+                "center": norm_center,
+                "rotation": 0.0,
+                "n_sides": n_sides,
+            }
+
+            self.cells[cell_id] = Cell(
+                id=cell_id,
+                neighbors={},
+                row=0,
+                col=0,
+                position_hint=norm_center,
+                tiling_coords=tiling_coords,
+            )
+
+        self._cell_list = list(self.cells.keys())
+
+        # Step 4: Build adjacency by shared-edge detection
+        vertex_eps = 0.5 / scale  # scale epsilon to normalized space
+
+        # Spatial index: bucket vertices
+        bucket_resolution = vertex_eps * 2
+        vertex_to_cells: dict[tuple[int, int], set[str]] = {}
+
+        for cell_id in self.cells:
+            tc = self.cells[cell_id].tiling_coords
+            for vx, vy in tc["vertices"]:
+                bx = int(round(vx / bucket_resolution))
+                by = int(round(vy / bucket_resolution))
+                for dbx in [-1, 0, 1]:
+                    for dby in [-1, 0, 1]:
+                        key = (bx + dbx, by + dby)
+                        if key not in vertex_to_cells:
+                            vertex_to_cells[key] = set()
+                        vertex_to_cells[key].add(cell_id)
+
+        # Find candidate neighbor pairs
+        candidate_pairs: set[tuple[str, str]] = set()
+        for cell_id in self.cells:
+            tc = self.cells[cell_id].tiling_coords
+            neighbor_candidates: set[str] = set()
+            for vx, vy in tc["vertices"]:
+                bx = int(round(vx / bucket_resolution))
+                by = int(round(vy / bucket_resolution))
+                for cid in vertex_to_cells.get((bx, by), []):
+                    if cid != cell_id:
+                        neighbor_candidates.add(cid)
+            for cid in neighbor_candidates:
+                pair = (min(cell_id, cid), max(cell_id, cid))
+                candidate_pairs.add(pair)
+
+        # Check each candidate pair for shared edge
+        for cid_a, cid_b in candidate_pairs:
+            verts_a = self.cells[cid_a].tiling_coords["vertices"]
+            verts_b = self.cells[cid_b].tiling_coords["vertices"]
+            shared = _shared_vertex_count(verts_a, verts_b, vertex_eps)
+            if shared >= 2:
+                edge_idx_a = self._find_shared_edge_index(verts_a, verts_b, vertex_eps)
+                edge_idx_b = self._find_shared_edge_index(verts_b, verts_a, vertex_eps)
+
+                dir_a = f"edge_{edge_idx_a}"
+                dir_b = f"edge_{edge_idx_b}"
+
+                self.cells[cid_a].neighbors[dir_a] = cid_b
+                self.cells[cid_b].neighbors[dir_b] = cid_a
+
+        return self.cells
+
+    def _find_shared_edge_index(self, verts_a: list[tuple[float, float]],
+                                 verts_b: list[tuple[float, float]],
+                                 eps: float) -> int:
+        """
+        Find which edge index of polygon A is shared with polygon B.
+        An edge is (verts_a[i], verts_a[(i+1)%n]). It's shared if both
+        endpoints match vertices in verts_b.
+        """
+        n = len(verts_a)
+        for i in range(n):
+            v0 = verts_a[i]
+            v1 = verts_a[(i + 1) % n]
+            match0 = any(_vertices_match(v0, vb, eps) for vb in verts_b)
+            match1 = any(_vertices_match(v1, vb, eps) for vb in verts_b)
+            if match0 and match1:
+                return i
+        return 0  # fallback
+
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized [0,1] coordinates to nearest cell ID."""
+        best_id = self._cell_list[0] if self._cell_list else ""
+        best_dist = float("inf")
+
+        for cell_id, cell in self.cells.items():
+            cx, cy = cell.position_hint
+            d = (cx - x) ** 2 + (cy - y) ** 2
+            if d < best_dist:
+                best_dist = d
+                best_id = cell_id
+
+        return best_id
+
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized [0,1] coordinates."""
+        if cell_id in self.cells:
+            return self.cells[cell_id].position_hint
+        return (0.5, 0.5)
+
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """
+        Get neighbor cell ID in given direction, or None.
+
+        Directions beyond the cell's actual edge count return None.
+        For example, a triangle only uses edge_0..edge_2; edge_3..edge_5
+        return None.
+        """
+        cell = self.cells.get(cell_id)
+        if cell is None:
+            return None
+        return cell.neighbors.get(direction)
+
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Compute graph distance (hops) between two cells using BFS."""
+        if cell_a == cell_b:
+            return 0
+        if cell_a not in self.cells or cell_b not in self.cells:
+            return 999
+
+        visited = {cell_a}
+        queue = deque([(cell_a, 0)])
+
+        while queue:
+            current, dist = queue.popleft()
+            if current == cell_b:
+                return dist
+            cell = self.cells[current]
+            for neighbor_id in cell.neighbors.values():
+                if neighbor_id not in visited:
+                    visited.add(neighbor_id)
+                    queue.append((neighbor_id, dist + 1))
+
+        return 999  # unreachable
diff --git a/src/v1_1/multigrid/tilings/archimedean_488.py b/src/v1_1/multigrid/tilings/archimedean_488.py
new file mode 100644
index 00000000..a8d45ce2
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/archimedean_488.py
@@ -0,0 +1,334 @@
+# tilings/archimedean_488.py
+
+"""
+Truncated Square (4-8-8) Archimedean Tiling
+
+This tiling alternates regular octagons and squares. At every vertex,
+one square and two octagons meet (vertex configuration 4.8.8).
+
+Layout:
+  - A checkerboard grid of spacing d = s * (1 + sqrt(2)) where s is edge length.
+  - At even (row+col) positions: octagons (8 edges/neighbors).
+  - At odd (row+col) positions: squares (4 edges/neighbors).
+
+Adjacency is determined by shared-edge detection: two cells are neighbors
+if they share exactly 2 vertices (within epsilon tolerance).
+"""
+
+import math
+from collections import deque
+from typing import Optional
+from ..base import Tiling
+from ..core import Cell
+
+
+# Epsilon for floating-point vertex matching
+_EPS = 1e-6
+
+
+def _regular_polygon_vertices(center: tuple[float, float], n: int,
+                               radius: float, rotation: float = 0.0
+                               ) -> list[tuple[float, float]]:
+    """
+    Compute vertices of a regular n-gon centered at `center` with
+    circumradius `radius` and an initial rotation angle (radians).
+    """
+    cx, cy = center
+    verts = []
+    for i in range(n):
+        angle = rotation + 2 * math.pi * i / n
+        vx = cx + radius * math.cos(angle)
+        vy = cy + radius * math.sin(angle)
+        verts.append((vx, vy))
+    return verts
+
+
+def _edge_length_to_circumradius(n: int, s: float) -> float:
+    """Circumradius of a regular n-gon with edge length s."""
+    return s / (2 * math.sin(math.pi / n))
+
+
+def _vertices_match(v1: tuple[float, float], v2: tuple[float, float],
+                    eps: float = _EPS) -> bool:
+    """Check if two 2D points are within epsilon."""
+    return abs(v1[0] - v2[0]) < eps and abs(v1[1] - v2[1]) < eps
+
+
+def _shared_vertex_count(verts_a: list[tuple[float, float]],
+                         verts_b: list[tuple[float, float]],
+                         eps: float = _EPS) -> int:
+    """Count the number of shared vertices between two polygons."""
+    count = 0
+    for va in verts_a:
+        for vb in verts_b:
+            if _vertices_match(va, vb, eps):
+                count += 1
+    return count
+
+
+class Archimedean488Tiling(Tiling):
+    """
+    Truncated Square (4-8-8) Archimedean tiling.
+
+    Alternating octagons (8 neighbors) and squares (4 neighbors) on a
+    checkerboard grid.
+    """
+
+    _MAX_EDGES = 8
+
+    def __init__(self):
+        super().__init__()
+        self._cell_list: list[str] = []
+        self._grid_cols = 0
+        self._grid_rows = 0
+
+    @property
+    def name(self) -> str:
+        return "488"
+
+    @property
+    def directions(self) -> list[str]:
+        return [f"edge_{i}" for i in range(self._MAX_EDGES)]
+
+    def generate_graph(self, width: int, height: int, seed: int = 0
+                       ) -> dict[str, Cell]:
+        """
+        Generate the 4-8-8 tiling as an adjacency graph.
+
+        Args:
+            width: Number of grid columns (of the checkerboard).
+            height: Number of grid rows (of the checkerboard).
+            seed: Random seed (unused for deterministic tilings).
+
+        Returns:
+            Dictionary of cell_id -> Cell.
+        """
+        self.width = width
+        self.height = height
+        self._grid_cols = width
+        self._grid_rows = height
+        self.cells = {}
+
+        s = 1.0  # edge length
+
+        # Circumradii
+        oct_R = _edge_length_to_circumradius(8, s)
+        sq_R = _edge_length_to_circumradius(4, s)
+
+        # Apothems (center to edge midpoint)
+        oct_apothem = oct_R * math.cos(math.pi / 8)
+        sq_apothem = sq_R * math.cos(math.pi / 4)
+
+        # Grid spacing: center-to-center distance between adjacent oct and sq
+        # equals the sum of their apothems so edges align perfectly
+        d = oct_apothem + sq_apothem
+
+        # Octagon rotation: rotate by pi/8 so edges are horizontal/vertical
+        oct_rot = math.pi / 8
+
+        # Square rotation: 45 degrees so vertices point toward octagon edges
+        sq_rot = math.pi / 4
+
+        # Build all tiles
+        all_tiles = []
+
+        for row in range(height):
+            for col in range(width):
+                cx = col * d
+                cy = row * d
+                is_octagon = (row + col) % 2 == 0
+
+                if is_octagon:
+                    cell_id = f"a488_oct_{row}_{col}"
+                    verts = _regular_polygon_vertices((cx, cy), 8, oct_R, oct_rot)
+                    tile_type = "octagon"
+                    n_sides = 8
+                else:
+                    cell_id = f"a488_sq_{row}_{col}"
+                    verts = _regular_polygon_vertices((cx, cy), 4, sq_R, sq_rot)
+                    tile_type = "square"
+                    n_sides = 4
+
+                all_tiles.append({
+                    "cell_id": cell_id,
+                    "tile_type": tile_type,
+                    "center": (cx, cy),
+                    "vertices": verts,
+                    "rotation": oct_rot if is_octagon else sq_rot,
+                    "n_sides": n_sides,
+                    "grid_row": row,
+                    "grid_col": col,
+                })
+
+        # Compute bounding box for normalization
+        all_xs = []
+        all_ys = []
+        for tile in all_tiles:
+            for vx, vy in tile["vertices"]:
+                all_xs.append(vx)
+                all_ys.append(vy)
+
+        min_x, max_x = min(all_xs), max(all_xs)
+        min_y, max_y = min(all_ys), max(all_ys)
+        range_x = max_x - min_x if max_x > min_x else 1.0
+        range_y = max_y - min_y if max_y > min_y else 1.0
+
+        # Uniform scaling to preserve aspect ratio
+        scale = max(range_x, range_y)
+        if scale < _EPS:
+            scale = 1.0
+
+        def normalize(px, py):
+            nx = (px - min_x) / scale
+            ny = (py - min_y) / scale
+            offset_x = (1.0 - range_x / scale) / 2
+            offset_y = (1.0 - range_y / scale) / 2
+            return nx + offset_x, ny + offset_y
+
+        for tile in all_tiles:
+            cell_id = tile["cell_id"]
+            norm_center = normalize(tile["center"][0], tile["center"][1])
+            norm_verts = [normalize(vx, vy) for vx, vy in tile["vertices"]]
+
+            tiling_coords = {
+                "tile_type": tile["tile_type"],
+                "vertices": norm_verts,
+                "center": norm_center,
+                "rotation": tile["rotation"],
+                "n_sides": tile["n_sides"],
+            }
+
+            self.cells[cell_id] = Cell(
+                id=cell_id,
+                neighbors={},
+                row=tile["grid_row"],
+                col=tile["grid_col"],
+                position_hint=norm_center,
+                tiling_coords=tiling_coords,
+            )
+
+        self._cell_list = list(self.cells.keys())
+
+        # Build adjacency by shared-edge detection
+        vertex_eps = 0.5 / scale
+
+        # Spatial index: bucket vertices
+        bucket_resolution = vertex_eps * 2
+        vertex_to_cells: dict[tuple[int, int], list[str]] = {}
+
+        for cell_id in self.cells:
+            tc = self.cells[cell_id].tiling_coords
+            for vx, vy in tc["vertices"]:
+                bx = int(round(vx / bucket_resolution))
+                by = int(round(vy / bucket_resolution))
+                for dbx in [-1, 0, 1]:
+                    for dby in [-1, 0, 1]:
+                        key = (bx + dbx, by + dby)
+                        if key not in vertex_to_cells:
+                            vertex_to_cells[key] = []
+                        vertex_to_cells[key].append(cell_id)
+
+        # Find candidate neighbor pairs
+        candidate_pairs: set[tuple[str, str]] = set()
+        for cell_id in self.cells:
+            tc = self.cells[cell_id].tiling_coords
+            neighbor_candidates: set[str] = set()
+            for vx, vy in tc["vertices"]:
+                bx = int(round(vx / bucket_resolution))
+                by = int(round(vy / bucket_resolution))
+                for cid in vertex_to_cells.get((bx, by), []):
+                    if cid != cell_id:
+                        neighbor_candidates.add(cid)
+            for cid in neighbor_candidates:
+                pair = (min(cell_id, cid), max(cell_id, cid))
+                candidate_pairs.add(pair)
+
+        # Check each candidate pair
+        for cid_a, cid_b in candidate_pairs:
+            verts_a = self.cells[cid_a].tiling_coords["vertices"]
+            verts_b = self.cells[cid_b].tiling_coords["vertices"]
+            shared = _shared_vertex_count(verts_a, verts_b, vertex_eps)
+            if shared >= 2:
+                edge_idx_a = self._find_shared_edge_index(verts_a, verts_b, vertex_eps)
+                edge_idx_b = self._find_shared_edge_index(verts_b, verts_a, vertex_eps)
+
+                dir_a = f"edge_{edge_idx_a}"
+                dir_b = f"edge_{edge_idx_b}"
+
+                self.cells[cid_a].neighbors[dir_a] = cid_b
+                self.cells[cid_b].neighbors[dir_b] = cid_a
+
+        return self.cells
+
+    def _find_shared_edge_index(self, verts_a: list[tuple[float, float]],
+                                 verts_b: list[tuple[float, float]],
+                                 eps: float) -> int:
+        """
+        Find which edge index of polygon A is shared with polygon B.
+        An edge is (verts_a[i], verts_a[(i+1)%n]). It's shared if both
+        endpoints match vertices in verts_b.
+        """
+        n = len(verts_a)
+        for i in range(n):
+            v0 = verts_a[i]
+            v1 = verts_a[(i + 1) % n]
+            match0 = any(_vertices_match(v0, vb, eps) for vb in verts_b)
+            match1 = any(_vertices_match(v1, vb, eps) for vb in verts_b)
+            if match0 and match1:
+                return i
+        return 0  # fallback
+
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized [0,1] coordinates to nearest cell ID."""
+        best_id = self._cell_list[0] if self._cell_list else ""
+        best_dist = float("inf")
+
+        for cell_id, cell in self.cells.items():
+            cx, cy = cell.position_hint
+            d = (cx - x) ** 2 + (cy - y) ** 2
+            if d < best_dist:
+                best_dist = d
+                best_id = cell_id
+
+        return best_id
+
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized [0,1] coordinates."""
+        if cell_id in self.cells:
+            return self.cells[cell_id].position_hint
+        return (0.5, 0.5)
+
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """
+        Get neighbor cell ID in given direction, or None.
+
+        Directions beyond the cell's actual edge count return None.
+        For example, a square only uses edge_0..edge_3; edge_4..edge_7
+        return None.
+        """
+        cell = self.cells.get(cell_id)
+        if cell is None:
+            return None
+        return cell.neighbors.get(direction)
+
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Compute graph distance (hops) between two cells using BFS."""
+        if cell_a == cell_b:
+            return 0
+        if cell_a not in self.cells or cell_b not in self.cells:
+            return 999
+
+        visited = {cell_a}
+        queue = deque([(cell_a, 0)])
+
+        while queue:
+            current, dist = queue.popleft()
+            if current == cell_b:
+                return dist
+            cell = self.cells[current]
+            for neighbor_id in cell.neighbors.values():
+                if neighbor_id not in visited:
+                    visited.add(neighbor_id)
+                    queue.append((neighbor_id, dist + 1))
+
+        return 999  # unreachable
diff --git a/src/v1_1/multigrid/tilings/hex.py b/src/v1_1/multigrid/tilings/hex.py
new file mode 100644
index 00000000..ea92fc3d
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/hex.py
@@ -0,0 +1,293 @@
+# tilings/hex.py
+
+import math
+from dataclasses import dataclass
+from ..base import Tiling
+from ..core import Cell
+from typing import Optional
+
+
+@dataclass
+class AxialCoord:
+    """Axial coordinates for hexagonal grids."""
+    q: int
+    r: int
+
+    def __add__(self, other: "AxialCoord") -> "AxialCoord":
+        return AxialCoord(self.q + other.q, self.r + other.r)
+
+    def __sub__(self, other: "AxialCoord") -> "AxialCoord":
+        return AxialCoord(self.q - other.q, self.r - other.r)
+
+    def __hash__(self):
+        return hash((self.q, self.r))
+
+    def __eq__(self, other):
+        if not isinstance(other, AxialCoord):
+            return False
+        return self.q == other.q and self.r == other.r
+
+    @property
+    def s(self) -> int:
+        """Implicit third coordinate."""
+        return -self.q - self.r
+
+
+@dataclass
+class OffsetCoord:
+    """Offset coordinates for hexagonal grids (odd-r layout)."""
+    col: int
+    row: int
+
+
+# Direction labels (clockwise from north)
+DIRECTIONS = ["north", "northeast", "southeast", "south", "southwest", "northwest"]
+
+DIR_INDEX = {
+    "north": 0,
+    "northeast": 1,
+    "southeast": 2,
+    "south": 3,
+    "southwest": 4,
+    "northwest": 5
+}
+
+# Direction vectors in axial coordinates
+# Pointy-top hex, starting from north (up), going clockwise
+DIR_VECTORS_AXIAL = {
+    "north":     AxialCoord(0, -1),
+    "northeast": AxialCoord(1, -1),
+    "southeast": AxialCoord(1, 0),
+    "south":     AxialCoord(0, 1),
+    "southwest": AxialCoord(-1, 1),
+    "northwest": AxialCoord(-1, 0)
+}
+
+# Opposite directions
+OPPOSITE = {
+    "north": "south",
+    "northeast": "southwest",
+    "southeast": "northwest",
+    "south": "north",
+    "southwest": "northeast",
+    "northwest": "southeast"
+}
+
+
+def offset_to_axial(offset: OffsetCoord) -> AxialCoord:
+    """Convert odd-r offset to axial coordinates."""
+    q = offset.col - (offset.row - (offset.row & 1)) // 2
+    r = offset.row
+    return AxialCoord(q, r)
+
+
+def axial_to_offset(axial: AxialCoord) -> OffsetCoord:
+    """Convert axial to odd-r offset coordinates."""
+    col = axial.q + (axial.r - (axial.r & 1)) // 2
+    row = axial.r
+    return OffsetCoord(col, row)
+
+
+def axial_to_cell_id(coord: AxialCoord) -> str:
+    """Convert axial coordinates to cell ID."""
+    return f"hex_{coord.q}_{coord.r}"
+
+
+def cell_id_to_axial(cell_id: str) -> AxialCoord:
+    """Parse cell ID to axial coordinates."""
+    _, q, r = cell_id.split("_")
+    return AxialCoord(int(q), int(r))
+
+
+def axial_round(q_frac: float, r_frac: float) -> AxialCoord:
+    """Round fractional axial coordinates to nearest hex."""
+    s_frac = -q_frac - r_frac
+
+    q = round(q_frac)
+    r = round(r_frac)
+    s = round(s_frac)
+
+    q_diff = abs(q - q_frac)
+    r_diff = abs(r - r_frac)
+    s_diff = abs(s - s_frac)
+
+    # Reset the component with largest rounding error
+    if q_diff > r_diff and q_diff > s_diff:
+        q = -r - s
+    elif r_diff > s_diff:
+        r = -q - s
+    # else: s = -q - r (implicit, we don't store s)
+
+    return AxialCoord(q, r)
+
+
+def axial_distance(a: AxialCoord, b: AxialCoord) -> int:
+    """Distance in axial coordinates (derived from cube)."""
+    return (
+        abs(a.q - b.q) +
+        abs(a.q + a.r - b.q - b.r) +
+        abs(a.r - b.r)
+    ) // 2
+
+
+class HexTiling(Tiling):
+    """Hexagonal tiling implementation with pointy-top orientation."""
+
+    def __init__(self):
+        super().__init__()
+        self._bounds: set[AxialCoord] = set()
+
+    @property
+    def name(self) -> str:
+        return "hex"
+
+    @property
+    def directions(self) -> list[str]:
+        return DIRECTIONS
+
+    def generate_graph(self, width: int, height: int, seed: int = 0) -> dict[str, Cell]:
+        """
+        Generate hexagonal grid as adjacency graph.
+
+        Creates a rectangular region of hexes using offset coordinates
+        for layout, then converts to axial for math.
+
+        Args:
+            width: Number of columns
+            height: Number of rows
+            seed: Random seed (unused for regular grids)
+
+        Returns:
+            Dictionary of cell_id -> Cell
+        """
+        self.width = width
+        self.height = height
+        self.cells = {}
+        self._bounds = set()
+
+        # Create cells using offset coordinates for rectangular layout
+        for row in range(height):
+            for col in range(width):
+                offset = OffsetCoord(col, row)
+                axial = offset_to_axial(offset)
+
+                cell_id = axial_to_cell_id(axial)
+                pos = self._axial_to_normalized(axial)
+
+                self.cells[cell_id] = Cell(
+                    id=cell_id,
+                    neighbors={},
+                    row=row,
+                    col=col,
+                    position_hint=pos,
+                    tiling_coords=axial
+                )
+                self._bounds.add(axial)
+
+        # Connect neighbors
+        for cell_id, cell in self.cells.items():
+            axial = cell.tiling_coords
+            for direction, delta in DIR_VECTORS_AXIAL.items():
+                neighbor_axial = axial + delta
+                if neighbor_axial in self._bounds:
+                    neighbor_id = axial_to_cell_id(neighbor_axial)
+                    cell.neighbors[direction] = neighbor_id
+
+        return self.cells
+
+    def _axial_to_normalized(self, axial: AxialCoord) -> tuple[float, float]:
+        """Convert axial to normalized [0,1] coordinates for rendering."""
+        # Convert axial back to offset coordinates for positioning
+        offset = axial_to_offset(axial)
+        col, row = offset.col, offset.row
+
+        # For pointy-top hexagons in odd-r offset layout:
+        # - Horizontal spacing between columns: sqrt(3) * size
+        # - Vertical spacing between rows: 3/2 * size
+        # - Odd rows are offset by sqrt(3)/2 * size to the right
+
+        # Calculate size to fit grid in [0,1] space with margin
+        width_spacing = (self.width - 1) if self.width > 1 else 1
+        height_spacing = (self.height - 1) if self.height > 1 else 1
+
+        # Account for odd-row offset in horizontal extent
+        # Max horizontal extent is width * sqrt(3) * size + (for odd row) sqrt(3)/2 * size
+        # = (width + 0.5) * sqrt(3) * size
+        size_from_width = 0.95 / ((self.width + 0.5) * math.sqrt(3)) if self.width > 0 else 0.1
+        size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+        size = min(size_from_width, size_from_height)
+
+        # Position hex based on offset coordinates
+        x = col * math.sqrt(3) * size
+        y = row * 1.5 * size
+
+        # Odd rows are shifted right by sqrt(3)/2 * size
+        if row % 2 == 1:
+            x += math.sqrt(3) / 2 * size
+
+        # Center the grid
+        grid_width = (self.width + 0.5) * math.sqrt(3) * size
+        grid_height = (self.height - 0.5) * 1.5 * size
+
+        x_offset = (1.0 - grid_width) / 2
+        y_offset = (1.0 - grid_height) / 2
+
+        return x + x_offset, y + y_offset
+
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized coordinates to nearest cell ID."""
+        # Calculate size (same as in _axial_to_normalized)
+        width_spacing = (self.width - 1) if self.width > 1 else 1
+        height_spacing = (self.height - 1) if self.height > 1 else 1
+
+        size_from_width = 0.95 / ((self.width + 0.5) * math.sqrt(3)) if self.width > 0 else 0.1
+        size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+        size = min(size_from_width, size_from_height)
+
+        # Calculate grid offset
+        grid_width = (self.width + 0.5) * math.sqrt(3) * size
+        grid_height = (self.height - 0.5) * 1.5 * size
+        x_offset = (1.0 - grid_width) / 2
+        y_offset = (1.0 - grid_height) / 2
+
+        # Reverse the transformation
+        px = (x - x_offset) / size
+        py = (y - y_offset) / size
+
+        # Pixel to fractional offset coordinates
+        # Account for odd-row shifting
+        row_frac = py / 1.5
+        row = round(row_frac)
+
+        # If odd row, subtract the offset before calculating column
+        x_adjusted = px
+        if row % 2 == 1:
+            x_adjusted -= math.sqrt(3) / 2
+
+        col_frac = x_adjusted / math.sqrt(3)
+        col = round(col_frac)
+
+        # Clamp to valid bounds
+        col = max(0, min(self.width - 1, col))
+        row = max(0, min(self.height - 1, row))
+
+        # Convert to axial
+        offset = OffsetCoord(col, row)
+        axial = offset_to_axial(offset)
+
+        return axial_to_cell_id(axial)
+
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized coordinates (hex center)."""
+        axial = cell_id_to_axial(cell_id)
+        return self._axial_to_normalized(axial)
+
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """Get neighbor in given direction."""
+        return self.cells[cell_id].neighbors.get(direction)
+
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Graph distance (hops) between cells."""
+        axial_a = cell_id_to_axial(cell_a)
+        axial_b = cell_id_to_axial(cell_b)
+        return axial_distance(axial_a, axial_b)
diff --git a/src/v1_1/multigrid/tilings/square.py b/src/v1_1/multigrid/tilings/square.py
new file mode 100644
index 00000000..8bcc9910
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/square.py
@@ -0,0 +1,180 @@
+# tilings/square.py
+
+from ..base import Tiling
+from ..core import Cell
+from typing import Optional
+
+
+# Direction labels
+DIRECTIONS = ["north", "east", "south", "west"]
+
+# Direction index mapping
+DIR_INDEX = {
+    "north": 0,
+    "east": 1,
+    "south": 2,
+    "west": 3
+}
+
+# Direction vectors (row_delta, col_delta)
+DIR_VECTORS = {
+    "north": (-1, 0),   # Up (row decreases)
+    "east":  (0, 1),    # Right (col increases)
+    "south": (1, 0),    # Down (row increases)
+    "west":  (0, -1)    # Left (col decreases)
+}
+
+# Opposite directions (for backward movement)
+OPPOSITE = {
+    "north": "south",
+    "east": "west",
+    "south": "north",
+    "west": "east"
+}
+
+
+def row_col_to_cell_id(row: int, col: int) -> str:
+    """Convert row,col to cell ID."""
+    return f"sq_{row}_{col}"
+
+
+def cell_id_to_row_col(cell_id: str) -> tuple[int, int]:
+    """Parse cell ID to row,col."""
+    _, row, col = cell_id.split("_")
+    return int(row), int(col)
+
+
+def canonical_to_row_col(x: float, y: float, width: int, height: int) -> tuple[int, int]:
+    """
+    Convert normalized [0,1] coordinates to grid row,col.
+
+    Args:
+        x: Horizontal position [0,1]
+        y: Vertical position [0,1]
+        width: Grid width in cells
+        height: Grid height in cells
+
+    Returns:
+        (row, col) tuple
+    """
+    col = min(int(x * width), width - 1)
+    row = min(int(y * height), height - 1)
+    return row, col
+
+
+def row_col_to_canonical(row: int, col: int, width: int, height: int) -> tuple[float, float]:
+    """
+    Convert grid row,col to normalized [0,1] coordinates (cell center).
+
+    Returns:
+        (x, y) tuple with x,y in [0,1]
+    """
+    x = (col + 0.5) / width
+    y = (row + 0.5) / height
+    return x, y
+
+
+def get_neighbor(row: int, col: int, direction: str, width: int, height: int) -> Optional[tuple[int, int]]:
+    """
+    Get neighbor cell in given direction.
+
+    Args:
+        row, col: Current cell coordinates
+        direction: One of "north", "east", "south", "west"
+        width, height: Grid dimensions
+
+    Returns:
+        (new_row, new_col) or None if out of bounds
+    """
+    dr, dc = DIR_VECTORS[direction]
+    new_row = row + dr
+    new_col = col + dc
+
+    # Bounds check
+    if 0 <= new_row < height and 0 <= new_col < width:
+        return new_row, new_col
+    return None
+
+
+def manhattan_distance(row1: int, col1: int, row2: int, col2: int) -> int:
+    """
+    Manhattan (L1) distance between two cells.
+    This is the minimum number of moves without obstacles.
+    """
+    return abs(row1 - row2) + abs(col1 - col2)
+
+
+class SquareTiling(Tiling):
+    """Square tiling implementation."""
+
+    @property
+    def name(self) -> str:
+        return "square"
+
+    @property
+    def directions(self) -> list[str]:
+        return DIRECTIONS
+
+    def generate_graph(self, width: int, height: int, seed: int = 0) -> dict[str, Cell]:
+        """
+        Generate square grid as adjacency graph.
+
+        Args:
+            width: Number of columns
+            height: Number of rows
+            seed: Random seed (unused for square grids, but kept for interface)
+
+        Returns:
+            Dictionary of cell_id -> Cell
+        """
+        self.width = width
+        self.height = height
+        self.cells = {}
+
+        # Create all cells
+        for row in range(height):
+            for col in range(width):
+                cell_id = row_col_to_cell_id(row, col)
+                pos = row_col_to_canonical(row, col, width, height)
+
+                self.cells[cell_id] = Cell(
+                    id=cell_id,
+                    neighbors={},
+                    row=row,
+                    col=col,
+                    position_hint=pos
+                )
+
+        # Connect neighbors
+        for row in range(height):
+            for col in range(width):
+                cell_id = row_col_to_cell_id(row, col)
+                cell = self.cells[cell_id]
+
+                for direction in self.directions:
+                    neighbor_coords = get_neighbor(row, col, direction, width, height)
+                    if neighbor_coords:
+                        neighbor_id = row_col_to_cell_id(*neighbor_coords)
+                        cell.neighbors[direction] = neighbor_id
+
+        return self.cells
+
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized coordinates to cell ID."""
+        row, col = canonical_to_row_col(x, y, self.width, self.height)
+        return row_col_to_cell_id(row, col)
+
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized coordinates (cell center)."""
+        row, col = cell_id_to_row_col(cell_id)
+        return row_col_to_canonical(row, col, self.width, self.height)
+
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """Get neighbor in given direction."""
+        return self.cells[cell_id].neighbors.get(direction)
+
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Graph distance (hops) between cells."""
+        row_a, col_a = cell_id_to_row_col(cell_a)
+        row_b, col_b = cell_id_to_row_col(cell_b)
+        return manhattan_distance(row_a, col_a, row_b, col_b)
diff --git a/src/v1_1/multigrid/tilings/triangle.py b/src/v1_1/multigrid/tilings/triangle.py
new file mode 100644
index 00000000..bb1d3bcb
--- /dev/null
+++ b/src/v1_1/multigrid/tilings/triangle.py
@@ -0,0 +1,205 @@
+# tilings/triangle.py
+
+import math
+from ..base import Tiling
+from ..core import Cell
+from typing import Optional
+from .hex import HexTiling, offset_to_axial, axial_to_offset, OffsetCoord, AxialCoord, DIR_VECTORS_AXIAL
+from .hex import DIRECTIONS as HEX_DIRECTIONS
+
+
+# Direction labels for triangular tiling
+# Each triangle has 3 edges
+DIRECTIONS = ["edge0", "edge1", "edge2"]
+
+DIR_INDEX = {
+    "edge0": 0,
+    "edge1": 1,
+    "edge2": 2
+}
+
+
+def parse_triangle_id(cell_id: str) -> tuple[int, int, int]:
+    """Parse triangle cell ID to (hex_col, hex_row, tri_index)."""
+    _, hex_col, hex_row, tri_idx = cell_id.split("_")
+    return int(hex_col), int(hex_row), int(tri_idx)
+
+
+def make_triangle_id(hex_col: int, hex_row: int, tri_index: int) -> str:
+    """Create triangle cell ID from hex position and triangle index."""
+    return f"tri_{hex_col}_{hex_row}_{tri_index}"
+
+
+class TriangleTiling(Tiling):
+    """Triangular tiling by subdividing hexagons into 6 triangles each."""
+
+    @property
+    def name(self) -> str:
+        return "triangle"
+
+    @property
+    def directions(self) -> list[str]:
+        return DIRECTIONS
+
+    def generate_graph(self, width: int, height: int, seed: int = 0) -> dict[str, Cell]:
+        """
+        Generate triangular grid by subdividing hexagons.
+
+        Each hexagon is divided into 6 triangles radiating from its center.
+        Triangles are numbered 0-5 going counterclockwise from north.
+
+        Args:
+            width: Number of hex columns
+            height: Number of hex rows
+            seed: Random seed (unused)
+
+        Returns:
+            Dictionary of cell_id -> Cell
+        """
+        self.width = width
+        self.height = height
+        self.cells = {}
+
+        # First create the underlying hex grid to get positions
+        hex_tiling = HexTiling()
+        hex_tiling.generate_graph(width, height, seed)
+
+        # For each hexagon, create 6 triangles
+        for hex_col in range(width):
+            for hex_row in range(height):
+                # Get hex center position
+                offset = OffsetCoord(hex_col, hex_row)
+                axial = offset_to_axial(offset)
+                hex_center = hex_tiling._axial_to_normalized(axial)
+
+                # Calculate hex size
+                width_spacing = (width - 1) if width > 1 else 1
+                height_spacing = (height - 1) if height > 1 else 1
+                size_from_width = 0.95 / ((width + 0.5) * math.sqrt(3))
+                size_from_height = 0.95 / (height_spacing * 1.5)
+                hex_size = min(size_from_width, size_from_height)
+
+                # Create 6 triangles for this hex
+                for tri_idx in range(6):
+                    cell_id = make_triangle_id(hex_col, hex_row, tri_idx)
+
+                    # Triangle center is 2/3 of the way from hex center to vertex
+                    angle = math.pi / 2 - tri_idx * math.pi / 3  # Start from north, go counterclockwise
+                    vertex_x = hex_center[0] + hex_size * math.cos(angle)
+                    vertex_y = hex_center[1] - hex_size * math.sin(angle)
+
+                    # Centroid is 1/3 from base (at hex center) to apex (at vertex)
+                    tri_center_x = hex_center[0] + (vertex_x - hex_center[0]) * (2/3)
+                    tri_center_y = hex_center[1] + (vertex_y - hex_center[1]) * (2/3)
+
+                    self.cells[cell_id] = Cell(
+                        id=cell_id,
+                        neighbors={},
+                        row=hex_row,
+                        col=hex_col,
+                        position_hint=(tri_center_x, tri_center_y),
+                        tiling_coords={"hex_center": hex_center, "tri_idx": tri_idx, "hex_size": hex_size}
+                    )
+
+        # Connect neighbors
+        # Within a hex: triangles share edges with adjacent triangles
+        # Between hexes: triangles share edges with triangles in adjacent hexes
+        for hex_col in range(width):
+            for hex_row in range(height):
+                for tri_idx in range(6):
+                    cell_id = make_triangle_id(hex_col, hex_row, tri_idx)
+                    cell = self.cells[cell_id]
+
+                    # edge0: counterclockwise triangle in same hex
+                    prev_tri = (tri_idx - 1) % 6
+                    neighbor_id = make_triangle_id(hex_col, hex_row, prev_tri)
+                    cell.neighbors["edge0"] = neighbor_id
+
+                    # edge1: clockwise triangle in same hex
+                    next_tri = (tri_idx + 1) % 6
+                    neighbor_id = make_triangle_id(hex_col, hex_row, next_tri)
+                    cell.neighbors["edge1"] = neighbor_id
+
+                    # edge2: triangle in adjacent hex (if it exists)
+                    # Each triangle points toward one of the 6 hex directions
+                    # Get the hex neighbor in that direction
+                    offset = OffsetCoord(hex_col, hex_row)
+                    axial = offset_to_axial(offset)
+
+                    # Direction mapping: triangle 0 points north, etc.
+                    hex_direction = HEX_DIRECTIONS[tri_idx]
+                    delta = DIR_VECTORS_AXIAL[hex_direction]
+                    neighbor_axial = axial + delta
+
+                    # Check if neighbor hex exists
+                    neighbor_offset = axial_to_offset(neighbor_axial)
+                    if 0 <= neighbor_offset.col < width and 0 <= neighbor_offset.row < height:
+                        # The outer edge of triangle tri_idx in this hex
+                        # connects to the triangle pointing back in the opposite direction
+                        opposite_tri = (tri_idx + 3) % 6
+                        neighbor_id = make_triangle_id(neighbor_offset.col, neighbor_offset.row, opposite_tri)
+                        if neighbor_id in self.cells:
+                            cell.neighbors["edge2"] = neighbor_id
+
+        return self.cells
+
+    def canonical_to_cell(self, x: float, y: float) -> str:
+        """Convert normalized coordinates to nearest triangle cell ID."""
+        # Find nearest hex first
+        hex_tiling = HexTiling()
+        hex_tiling.generate_graph(self.width, self.height)
+        hex_cell_id = hex_tiling.canonical_to_cell(x, y)
+
+        # Parse hex position from ID
+        _, hex_q, hex_r = hex_cell_id.split("_")
+        offset = axial_to_offset(AxialCoord(int(hex_q), int(hex_r)))
+        hex_col, hex_row = offset.col, offset.row
+
+        # Get hex center
+        axial = offset_to_axial(OffsetCoord(hex_col, hex_row))
+        hex_center = hex_tiling._axial_to_normalized(axial)
+
+        # Determine which triangle based on angle from hex center
+        dx = x - hex_center[0]
+        dy = y - hex_center[1]
+        angle = math.atan2(-dy, dx)  # Note: -dy because y increases downward
+
+        # Convert angle to triangle index (0-5, starting from north counterclockwise)
+        # North is at angle π/2
+        adjusted_angle = (math.pi / 2 - angle) % (2 * math.pi)
+        tri_idx = int(adjusted_angle / (math.pi / 3)) % 6
+
+        return make_triangle_id(hex_col, hex_row, tri_idx)
+
+    def cell_to_canonical(self, cell_id: str) -> tuple[float, float]:
+        """Convert cell ID to normalized coordinates (triangle center)."""
+        if cell_id in self.cells:
+            return self.cells[cell_id].position_hint
+        # Fallback
+        return (0.5, 0.5)
+
+    def get_neighbor(self, cell_id: str, direction: str) -> Optional[str]:
+        """Get neighbor in given direction."""
+        return self.cells[cell_id].neighbors.get(direction)
+
+    def distance(self, cell_a: str, cell_b: str) -> int:
+        """Graph distance (hops) between cells using BFS."""
+        if cell_a == cell_b:
+            return 0
+
+        from collections import deque
+        visited = {cell_a}
+        queue = deque([(cell_a, 0)])
+
+        while queue:
+            current, dist = queue.popleft()
+            if current == cell_b:
+                return dist
+
+            cell = self.cells[current]
+            for neighbor_id in cell.neighbors.values():
+                if neighbor_id not in visited:
+                    visited.add(neighbor_id)
+                    queue.append((neighbor_id, dist + 1))
+
+        return 999
diff --git a/src/v1_1/multigrid/visibility.py b/src/v1_1/multigrid/visibility.py
new file mode 100644
index 00000000..579bd46e
--- /dev/null
+++ b/src/v1_1/multigrid/visibility.py
@@ -0,0 +1,166 @@
+# multigrid/visibility.py
+
+"""
+BFS-based visibility computation for MultiGrid partial observability.
+
+Supports two modes:
+  - Omnidirectional (fog_of_war): all cells within radius are visible
+  - Directional (view_cone): only cells within a facing-angle cone are visible
+
+Walls, closed doors, and closed gates block visibility propagation.
+"""
+
+import math
+from typing import Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .base import Tiling
+    from .world import WorldState
+
+
+def compute_visible_cells(
+    agent_cell_id: str,
+    tiling: "Tiling",
+    world_state: "WorldState",
+    radius: int,
+    facing: Optional[int] = None,
+    cone_half_angle: float = math.pi / 2,
+) -> set[str]:
+    """
+    Compute the set of cell IDs visible from the agent's position.
+
+    Uses BFS on the adjacency graph, stopping at blocking cells (walls,
+    closed doors, closed gates). If facing is provided, an angular cone
+    filter is applied.
+
+    Args:
+        agent_cell_id: The agent's current cell ID.
+        tiling: The tiling graph.
+        world_state: Current world state (used to check blocking objects).
+        radius: Maximum BFS hop distance.
+        facing: Agent facing index (None = omnidirectional / fog_of_war).
+        cone_half_angle: Half-angle of the view cone in radians (default 90 deg).
+
+    Returns:
+        Set of visible cell IDs.
+    """
+    visible = {agent_cell_id}
+
+    # BFS frontier: (cell_id, hops_so_far)
+    frontier = [(agent_cell_id, 0)]
+    visited = {agent_cell_id}
+
+    # Pre-compute agent position and facing angle for cone filtering
+    agent_pos = None
+    facing_angle = None
+    if facing is not None:
+        agent_pos = tiling.cells[agent_cell_id].position_hint
+        facing_angle = _facing_to_angle(facing, tiling)
+
+    while frontier:
+        next_frontier = []
+        for cell_id, hops in frontier:
+            if hops >= radius:
+                continue
+
+            cell = tiling.cells.get(cell_id)
+            if cell is None:
+                continue
+
+            for _direction, neighbor_id in cell.neighbors.items():
+                if neighbor_id in visited:
+                    continue
+                visited.add(neighbor_id)
+
+                # Check if neighbor blocks visibility
+                blocking = _is_cell_blocking(neighbor_id, world_state)
+
+                # Apply cone filter if directional
+                if facing is not None and agent_pos is not None:
+                    neighbor_pos = tiling.cells[neighbor_id].position_hint
+                    if not _is_in_view_cone(agent_pos, neighbor_pos, facing_angle, cone_half_angle):
+                        continue
+
+                # The cell is visible (even blocking cells are visible themselves)
+                visible.add(neighbor_id)
+
+                # But don't propagate BFS through blocking cells
+                if not blocking:
+                    next_frontier.append((neighbor_id, hops + 1))
+
+        frontier = next_frontier
+
+    return visible
+
+
+def _facing_to_angle(facing: int, tiling: "Tiling") -> float:
+    """
+    Convert a facing direction index to an angle in radians.
+
+    Angle convention: 0 = right (+x), pi/2 = down (+y).
+    This matches the rendering coordinate system.
+
+    For square tilings: 0=N(-pi/2), 1=E(0), 2=S(pi/2), 3=W(pi)
+    For hex tilings: 0=N(-pi/2), then 60-degree increments clockwise
+    """
+    num_dirs = len(tiling.directions)
+    tiling_name = tiling.name
+
+    if tiling_name == "square":
+        # Square: 0=N, 1=E, 2=S, 3=W
+        angle_map = {0: -math.pi / 2, 1: 0.0, 2: math.pi / 2, 3: math.pi}
+        return angle_map.get(facing, 0.0)
+    elif tiling_name == "hex":
+        # Hex: 0=N, then 60-degree clockwise increments
+        return -math.pi / 2 + facing * (math.pi / 3)
+    else:
+        # Generic: evenly spaced, starting from up
+        return -math.pi / 2 + facing * (2 * math.pi / num_dirs)
+
+
+def _is_in_view_cone(
+    agent_pos: tuple[float, float],
+    cell_pos: tuple[float, float],
+    facing_angle: float,
+    half_angle: float,
+) -> bool:
+    """
+    Check whether cell_pos is within the view cone of the agent.
+
+    Uses canonical (normalized) coordinates for the angle check.
+    """
+    dx = cell_pos[0] - agent_pos[0]
+    dy = cell_pos[1] - agent_pos[1]
+
+    if abs(dx) < 1e-9 and abs(dy) < 1e-9:
+        return True  # Same position
+
+    angle_to_cell = math.atan2(dy, dx)
+    angle_diff = abs(_normalize_angle(angle_to_cell - facing_angle))
+
+    return angle_diff <= half_angle
+
+
+def _normalize_angle(angle: float) -> float:
+    """Normalize angle to [-pi, pi]."""
+    while angle > math.pi:
+        angle -= 2 * math.pi
+    while angle < -math.pi:
+        angle += 2 * math.pi
+    return angle
+
+
+def _is_cell_blocking(cell_id: str, world_state: "WorldState") -> bool:
+    """
+    Check if a cell contains an object that blocks visibility.
+
+    Blocking objects: walls, closed doors, closed gates.
+    """
+    for obj in world_state.get_all_objects_at(cell_id):
+        if obj.obj_type == "wall":
+            return True
+        if obj.obj_type == "door" and not getattr(obj, "is_open", False):
+            return True
+        if obj.obj_type == "gate" and not getattr(obj, "is_open", False):
+            return True
+    return False
diff --git a/src/v1_1/multigrid/world.py b/src/v1_1/multigrid/world.py
new file mode 100644
index 00000000..6df3c12b
--- /dev/null
+++ b/src/v1_1/multigrid/world.py
@@ -0,0 +1,485 @@
+# multigrid/world.py
+
+"""
+World State and Action Execution for MultiGrid
+
+Handles:
+- World state management (agent, objects, goals)
+- Action execution with full mechanism support
+- Object interactions (keys/doors, switches/gates, hazards, teleporters)
+"""
+
+from typing import Optional, TYPE_CHECKING
+from .agent import AgentState, Action
+from .objects.base import WorldObj, ObjectRegistry
+from .base import Tiling
+from .goals import Goal, create_goal_from_spec
+from .visibility import compute_visible_cells
+
+if TYPE_CHECKING:
+    from .goals import Goal
+
+
+class WorldState:
+    """Complete world state."""
+
+    def __init__(self, tiling: Tiling):
+        self.tiling = tiling
+        self.agent = AgentState(cell_id="", facing=0)
+        self.objects: dict[str, WorldObj] = {}  # object_id -> WorldObj
+        self.goal: Optional[Goal] = None  # Goal predicate
+        self.rules: dict = {}  # Game rules (key_consumption, etc.)
+        self.hazard_hit: bool = False  # Track if agent hit a hazard
+
+        # Partial observability state
+        self.observability_mode: str = "full"  # "full", "view_cone", "fog_of_war"
+        self.view_radius: int = 3
+        self.visible_cells: set[str] = set()
+        self.explored_cells: set[str] = set()
+
+    @classmethod
+    def from_task_spec(cls, task_spec: dict, tiling: Tiling, seed: int = 0) -> "WorldState":
+        """Create world state from task specification."""
+        # Generate tiling graph
+        grid_size = task_spec.get("tiling", {}).get("grid_size", {"width": 10, "height": 10})
+        tiling.generate_graph(grid_size["width"], grid_size["height"], seed)
+
+        state = cls(tiling)
+
+        # Store rules
+        state.rules = task_spec.get("rules", {})
+
+        # Initialize agent
+        scene = task_spec.get("scene", {})
+        agent_spec = scene.get("agent", {"position": {"x": 0.1, "y": 0.1}})
+        agent_pos = agent_spec.get("position", {"x": 0.1, "y": 0.1})
+        agent_cell = tiling.canonical_to_cell(agent_pos["x"], agent_pos["y"])
+        state.agent = AgentState(
+            cell_id=agent_cell,
+            facing=agent_spec.get("facing", 0)
+        )
+
+        # Initialize objects with type-specific parameters
+        for obj_spec in scene.get("objects", []):
+            obj = state._create_object_from_spec(obj_spec, tiling)
+            if obj:
+                state.objects[obj.id] = obj
+
+        # Initialize goal from task spec
+        goal_spec = task_spec.get("goal", {})
+        if goal_spec:
+            state.goal = create_goal_from_spec(goal_spec, tiling)
+
+        # Link switches to gates
+        state._link_switches_and_gates()
+
+        # Compute zone covered_cells
+        _compute_zone_covered_cells(state, tiling)
+
+        return state
+
+    def _create_object_from_spec(self, obj_spec: dict, tiling: Tiling) -> Optional[WorldObj]:
+        """Create an object from specification with type-specific parameters."""
+        obj_type = obj_spec.get("type", "movable")
+        obj_id = obj_spec["id"]
+        color = obj_spec.get("color", "grey")
+
+        # Build kwargs based on object type
+        kwargs = {"id": obj_id, "color": color}
+
+        if obj_type == "door":
+            kwargs["is_locked"] = obj_spec.get("is_locked", True)
+
+        elif obj_type == "switch":
+            kwargs["switch_type"] = obj_spec.get("switch_type", "toggle")
+            kwargs["controls"] = obj_spec.get("controls", [])
+            kwargs["initial_state"] = obj_spec.get("initial_state", False)
+
+        elif obj_type == "gate":
+            kwargs["is_open"] = obj_spec.get("is_open", False)
+            kwargs["controlled_by"] = obj_spec.get("controlled_by", [])
+            kwargs["require_all"] = obj_spec.get("require_all", False)
+
+        elif obj_type == "hazard":
+            kwargs["hazard_type"] = obj_spec.get("hazard_type", "lava")
+            kwargs["damage"] = obj_spec.get("damage", 1.0)
+
+        elif obj_type == "teleporter":
+            kwargs["linked_to"] = obj_spec.get("linked_to")
+            kwargs["cooldown"] = obj_spec.get("cooldown", 1)
+
+        elif obj_type == "zone":
+            kwargs["radius_hops"] = obj_spec.get("radius_hops", 1)
+
+        try:
+            obj = ObjectRegistry.create(obj_type, **kwargs)
+            obj_pos = obj_spec.get("position", {"x": 0.5, "y": 0.5})
+            obj.cell_id = tiling.canonical_to_cell(obj_pos["x"], obj_pos["y"])
+            return obj
+        except (ValueError, KeyError) as e:
+            print(f"Warning: Could not create object {obj_id}: {e}")
+            return None
+
+    def _link_switches_and_gates(self) -> None:
+        """Link switches to their controlled gates."""
+        # Build gate lookup
+        gates = {obj.id: obj for obj in self.objects.values()
+                 if obj.obj_type == "gate"}
+
+        # Link switches to gates
+        for obj in self.objects.values():
+            if obj.obj_type == "switch":
+                for gate_id in obj.controls:
+                    if gate_id in gates:
+                        gate = gates[gate_id]
+                        if obj.id not in gate.controlled_by:
+                            gate.controlled_by.append(obj.id)
+
+    def update_visibility(self) -> None:
+        """Recompute visible cells based on observability mode."""
+        if self.observability_mode == "full":
+            self.visible_cells = set(self.tiling.cells.keys())
+            self.explored_cells = set(self.tiling.cells.keys())
+        else:
+            facing = self.agent.facing if self.observability_mode == "view_cone" else None
+            self.visible_cells = compute_visible_cells(
+                self.agent.cell_id,
+                self.tiling,
+                self,
+                self.view_radius,
+                facing=facing,
+            )
+            self.explored_cells |= self.visible_cells
+
+    def can_move_to(self, cell_id: str) -> bool:
+        """Check if agent can move to cell."""
+        for obj in self.objects.values():
+            if obj.cell_id == cell_id and not obj.can_overlap():
+                return False
+        return True
+
+    def get_object_at(self, cell_id: str) -> Optional[WorldObj]:
+        """Get first non-overlappable object at cell."""
+        for obj in self.objects.values():
+            if obj.cell_id == cell_id and not obj.can_overlap():
+                return obj
+        return None
+
+    def get_all_objects_at(self, cell_id: str) -> list[WorldObj]:
+        """Get all objects at cell (including overlappable)."""
+        return [obj for obj in self.objects.values() if obj.cell_id == cell_id]
+
+    def get_objects_by_type(self, obj_type: str) -> list[WorldObj]:
+        """Get all objects of a specific type."""
+        return [obj for obj in self.objects.values() if obj.obj_type == obj_type]
+
+    def update_gate_states(self) -> None:
+        """Update all gate states based on their controlling switches."""
+        switches = {obj.id: obj for obj in self.objects.values()
+                    if obj.obj_type == "switch"}
+
+        for obj in self.objects.values():
+            if obj.obj_type == "gate":
+                if not obj.controlled_by:
+                    continue
+
+                # Check controlling switches
+                active_switches = [
+                    switches[sw_id].is_active
+                    for sw_id in obj.controlled_by
+                    if sw_id in switches
+                ]
+
+                if not active_switches:
+                    continue
+
+                if obj.require_all:
+                    obj.set_open(all(active_switches))
+                else:
+                    obj.set_open(any(active_switches))
+
+    def check_hazard_collision(self) -> bool:
+        """Check if agent is on a hazard."""
+        for obj in self.get_all_objects_at(self.agent.cell_id):
+            if obj.obj_type == "hazard":
+                self.hazard_hit = True
+                return True
+        return False
+
+    def check_teleporter(self) -> Optional[str]:
+        """Check if agent is on a teleporter and should be transported."""
+        for obj in self.get_all_objects_at(self.agent.cell_id):
+            if obj.obj_type == "teleporter" and obj.can_teleport():
+                dest_id = obj.linked_to
+                # Find destination teleporter
+                if dest_id in self.objects:
+                    dest = self.objects[dest_id]
+                    if dest.cell_id:
+                        obj.use()
+                        return dest.cell_id
+        return None
+
+    def tick_teleporters(self) -> None:
+        """Reduce cooldown on all teleporters."""
+        for obj in self.objects.values():
+            if obj.obj_type == "teleporter":
+                obj.tick()
+
+    def check_goal(self) -> bool:
+        """Check if goal is achieved."""
+        if self.goal is None:
+            return False
+        return self.goal.check(self)
+
+
+def execute_action(
+    state: WorldState,
+    action: Action,
+    tiling: Tiling
+) -> tuple[WorldState, bool, dict]:
+    """
+    Execute action and return (new_state, done, info).
+
+    Handles all mechanism interactions:
+    - Keys unlock doors of matching color
+    - Switches control gates
+    - Hazards terminate the episode
+    - Teleporters transport the agent
+
+    Returns:
+        new_state: Updated world state
+        done: Whether episode terminated
+        info: Additional information (success, invalid_action, etc.)
+    """
+    agent = state.agent
+    info = {"invalid_action": False, "action_effect": None}
+
+    if action == Action.FORWARD:
+        facing_dir = agent.get_facing_direction(tiling)
+        next_cell = tiling.get_neighbor(agent.cell_id, facing_dir)
+        if next_cell and state.can_move_to(next_cell):
+            agent.cell_id = next_cell
+            info["action_effect"] = "moved"
+        else:
+            info["invalid_action"] = True
+
+    elif action == Action.BACKWARD:
+        facing_dir = agent.get_facing_direction(tiling)
+        # Get opposite direction
+        facing_idx = tiling.directions.index(facing_dir)
+        opposite_idx = (facing_idx + len(tiling.directions) // 2) % len(tiling.directions)
+        opposite_dir = tiling.directions[opposite_idx]
+        next_cell = tiling.get_neighbor(agent.cell_id, opposite_dir)
+        if next_cell and state.can_move_to(next_cell):
+            agent.cell_id = next_cell
+            info["action_effect"] = "moved"
+        else:
+            info["invalid_action"] = True
+
+    elif action == Action.TURN_LEFT:
+        num_dirs = len(tiling.directions)
+        agent.facing = (agent.facing - 1) % num_dirs
+        info["action_effect"] = "turned"
+
+    elif action == Action.TURN_RIGHT:
+        num_dirs = len(tiling.directions)
+        agent.facing = (agent.facing + 1) % num_dirs
+        info["action_effect"] = "turned"
+
+    elif action == Action.PICKUP:
+        if agent.holding is not None:
+            info["invalid_action"] = True
+        else:
+            # Check if there's an object in the agent's cell first
+            obj = state.get_object_at(agent.cell_id)
+
+            # If not in agent's cell, check the cell in facing direction
+            if not obj:
+                facing_dir = agent.get_facing_direction(tiling)
+                target_cell = tiling.get_neighbor(agent.cell_id, facing_dir)
+                if target_cell:
+                    obj = state.get_object_at(target_cell)
+
+            if obj and obj.can_pickup():
+                agent.holding = obj
+                obj.cell_id = None  # Remove from grid
+                state.objects.pop(obj.id, None)  # Remove from objects dict
+                info["action_effect"] = "picked_up"
+                info["picked_up_type"] = obj.obj_type
+            else:
+                info["invalid_action"] = True
+
+    elif action == Action.DROP:
+        if agent.holding is None:
+            info["invalid_action"] = True
+        else:
+            # Check if current cell is free for dropping
+            if state.can_move_to(agent.cell_id):
+                # Drop object in current cell
+                dropped_obj = agent.holding
+                dropped_obj.cell_id = agent.cell_id
+                state.objects[dropped_obj.id] = dropped_obj  # Add back to objects dict
+                agent.holding = None
+                info["action_effect"] = "dropped"
+            else:
+                # Cannot drop here - cell is occupied
+                info["invalid_action"] = True
+
+    elif action == Action.PUSH:
+        facing_dir = agent.get_facing_direction(tiling)
+        target_cell = tiling.get_neighbor(agent.cell_id, facing_dir)
+        if target_cell:
+            obj = state.get_object_at(target_cell)
+            if obj and obj.can_push():
+                push_dest = tiling.get_neighbor(target_cell, facing_dir)
+                # Validate push destination
+                if push_dest is not None and state.can_move_to(push_dest):
+                    obj.cell_id = push_dest
+                    info["action_effect"] = "pushed"
+                    info["pushed_to"] = push_dest
+                else:
+                    info["invalid_action"] = True
+                    info["reason"] = "push_destination_blocked"
+            else:
+                info["invalid_action"] = True
+                info["reason"] = "nothing_to_push" if not obj else "object_not_pushable"
+        else:
+            info["invalid_action"] = True
+            info["reason"] = "no_target_cell"
+
+    elif action == Action.TOGGLE:
+        # Toggle interacts with doors (unlock) and switches (activate)
+        facing_dir = agent.get_facing_direction(tiling)
+        target_cell = tiling.get_neighbor(agent.cell_id, facing_dir)
+
+        toggled = False
+
+        if target_cell:
+            # Check for door
+            for obj in state.get_all_objects_at(target_cell):
+                if obj.obj_type == "door":
+                    if obj.is_locked:
+                        # Try to unlock with held key
+                        if agent.holding and agent.holding.obj_type == "key":
+                            if agent.holding.color == obj.color:
+                                obj.unlock()
+                                info["action_effect"] = "unlocked_door"
+                                info["door_id"] = obj.id
+                                toggled = True
+
+                                # Consume key if rules say so
+                                if state.rules.get("key_consumption", True):
+                                    agent.holding.used = True
+                                    agent.holding = None
+                                break
+                    else:
+                        # Toggle open/closed
+                        obj.toggle()
+                        info["action_effect"] = "toggled_door"
+                        info["door_open"] = obj.is_open
+                        toggled = True
+                        break
+
+                elif obj.obj_type == "switch":
+                    if obj.activate():
+                        info["action_effect"] = "activated_switch"
+                        info["switch_id"] = obj.id
+                        info["switch_active"] = obj.is_active
+                        toggled = True
+                        # Update gate states
+                        state.update_gate_states()
+                        break
+
+        # Also check current cell for switches (step-on activation)
+        if not toggled:
+            for obj in state.get_all_objects_at(agent.cell_id):
+                if obj.obj_type == "switch":
+                    if obj.activate():
+                        info["action_effect"] = "activated_switch"
+                        info["switch_id"] = obj.id
+                        info["switch_active"] = obj.is_active
+                        toggled = True
+                        state.update_gate_states()
+                        break
+
+        if not toggled:
+            info["invalid_action"] = True
+            info["reason"] = "nothing_to_toggle"
+
+    elif action == Action.WAIT:
+        info["action_effect"] = "waited"
+
+    # Post-action processing
+
+    # Check for hold-type switches (deactivate if agent left)
+    _update_hold_switches(state)
+
+    # Update gate states
+    state.update_gate_states()
+
+    # Tick teleporter cooldowns
+    state.tick_teleporters()
+
+    # Check for teleporter transport
+    teleport_dest = state.check_teleporter()
+    if teleport_dest:
+        agent.cell_id = teleport_dest
+        info["teleported_to"] = teleport_dest
+
+    # Check for hazard collision
+    if state.check_hazard_collision():
+        info["hazard_hit"] = True
+        return state, True, info  # Episode terminates on hazard
+
+    # Check goal
+    done = state.check_goal()
+
+    return state, done, info
+
+
+def _bfs_zone(tiling: Tiling, center_cell_id: str, radius: int) -> set[str]:
+    """
+    BFS from center cell up to radius hops. Returns set of cell IDs within radius.
+
+    No blocking — zones expand freely through the tiling graph.
+    """
+    covered = {center_cell_id}
+    if radius <= 0:
+        return covered
+
+    frontier = [(center_cell_id, 0)]
+    while frontier:
+        next_frontier = []
+        for cell_id, hops in frontier:
+            if hops >= radius:
+                continue
+            cell = tiling.cells.get(cell_id)
+            if cell is None:
+                continue
+            for neighbor_id in cell.neighbors.values():
+                if neighbor_id not in covered:
+                    covered.add(neighbor_id)
+                    next_frontier.append((neighbor_id, hops + 1))
+        frontier = next_frontier
+
+    return covered
+
+
+def _compute_zone_covered_cells(state: WorldState, tiling: Tiling) -> None:
+    """Compute covered_cells for every zone object in the world."""
+    for obj in state.objects.values():
+        if obj.obj_type == "zone" and obj.cell_id:
+            obj.covered_cells = _bfs_zone(tiling, obj.cell_id, obj.radius_hops)
+
+
+def _update_hold_switches(state: WorldState) -> None:
+    """Update hold-type switches based on agent position."""
+    for obj in state.objects.values():
+        if obj.obj_type == "switch" and obj.switch_type == "hold":
+            if obj.cell_id == state.agent.cell_id:
+                # Agent is on switch - activate
+                if not obj.is_active:
+                    obj.activate()
+            else:
+                # Agent left switch - deactivate
+                obj.deactivate()
diff --git a/src/v1_1/nl_domain/__init__.py b/src/v1_1/nl_domain/__init__.py
new file mode 100644
index 00000000..344250fe
--- /dev/null
+++ b/src/v1_1/nl_domain/__init__.py
@@ -0,0 +1,11 @@
+"""
+Natural Language Domain (Domain 3) for MultiNet v1.1
+
+Provides NL action parsing, NL environment wrapper, and NL model interface
+for evaluating models that produce natural language action commands.
+"""
+
+from .nl_action_parser import NLActionParser
+from .nl_env import NLGridWorldEnv
+
+__all__ = ["NLActionParser", "NLGridWorldEnv"]
diff --git a/src/v1_1/nl_domain/nl_action_parser.py b/src/v1_1/nl_domain/nl_action_parser.py
new file mode 100644
index 00000000..bfcddb63
--- /dev/null
+++ b/src/v1_1/nl_domain/nl_action_parser.py
@@ -0,0 +1,155 @@
+"""
+Natural Language Action Parser
+
+Converts natural language commands to MiniGrid action IDs.
+Uses keyword-based pattern matching with directional decomposition.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Optional
+
+try:
+    from ..gridworld.actions import MiniGridActions
+except ImportError:
+    from gridworld.actions import MiniGridActions
+
+
+# Direction to agent-relative action mappings
+# These map compass directions to required facing direction (0=right, 1=down, 2=left, 3=up)
+COMPASS_TO_FACING = {
+    "north": 3, "up": 3,
+    "south": 1, "down": 1,
+    "east": 0, "right": 0,
+    "west": 2, "left": 2,
+}
+
+# Patterns mapped to action IDs, ordered by specificity (most specific first)
+ACTION_PATTERNS: list[tuple[re.Pattern, int]] = [
+    # Movement
+    (re.compile(r"\b(go|move|walk|step)\s+(forward|ahead|straight)\b", re.I), MiniGridActions.MOVE_FORWARD),
+    (re.compile(r"\bforward\b", re.I), MiniGridActions.MOVE_FORWARD),
+    (re.compile(r"\badvance\b", re.I), MiniGridActions.MOVE_FORWARD),
+
+    # Turning
+    (re.compile(r"\bturn\s+left\b", re.I), MiniGridActions.TURN_LEFT),
+    (re.compile(r"\bturn\s+right\b", re.I), MiniGridActions.TURN_RIGHT),
+    (re.compile(r"\brotate\s+left\b", re.I), MiniGridActions.TURN_LEFT),
+    (re.compile(r"\brotate\s+right\b", re.I), MiniGridActions.TURN_RIGHT),
+    (re.compile(r"\bleft\b", re.I), MiniGridActions.TURN_LEFT),
+    (re.compile(r"\bright\b", re.I), MiniGridActions.TURN_RIGHT),
+
+    # Interaction
+    (re.compile(r"\bpick\s*up\b", re.I), MiniGridActions.PICKUP),
+    (re.compile(r"\bgrab\b", re.I), MiniGridActions.PICKUP),
+    (re.compile(r"\bcollect\b", re.I), MiniGridActions.PICKUP),
+    (re.compile(r"\btake\b", re.I), MiniGridActions.PICKUP),
+
+    (re.compile(r"\bdrop\b", re.I), MiniGridActions.DROP),
+    (re.compile(r"\bput\s+down\b", re.I), MiniGridActions.DROP),
+    (re.compile(r"\brelease\b", re.I), MiniGridActions.DROP),
+
+    (re.compile(r"\btoggle\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bopen\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bclose\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bpress\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bactivate\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bswitch\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\bunlock\b", re.I), MiniGridActions.TOGGLE),
+    (re.compile(r"\binteract\b", re.I), MiniGridActions.TOGGLE),
+
+    # Wait/done
+    (re.compile(r"\bwait\b", re.I), MiniGridActions.DONE),
+    (re.compile(r"\bstay\b", re.I), MiniGridActions.DONE),
+    (re.compile(r"\bdo\s+nothing\b", re.I), MiniGridActions.DONE),
+    (re.compile(r"\bdone\b", re.I), MiniGridActions.DONE),
+    (re.compile(r"\bstop\b", re.I), MiniGridActions.DONE),
+
+    # Push (mapped to forward, since pushing is implicit on forward into block)
+    (re.compile(r"\bpush\b", re.I), MiniGridActions.MOVE_FORWARD),
+]
+
+# Compass direction patterns
+COMPASS_PATTERN = re.compile(
+    r"\b(?:go|move|walk|head)\s+(north|south|east|west|up|down|left|right)\b", re.I
+)
+
+
+class NLActionParser:
+    """
+    Parse natural language commands into MiniGrid action sequences.
+
+    Supports:
+    - Simple commands: "go forward", "turn left", "pick up", "toggle"
+    - Directional: "move north" -> decomposed to turn sequence + forward
+    - Compound: Multiple commands in one string (separated by "then" or commas)
+    """
+
+    def parse(self, command: str, agent_facing: int = 0) -> list[int]:
+        """
+        Parse a natural language command into a sequence of action IDs.
+
+        Args:
+            command: Natural language command string
+            agent_facing: Current agent facing direction (0=right, 1=down, 2=left, 3=up)
+
+        Returns:
+            List of action IDs (usually length 1, compound commands may be longer)
+        """
+        command = command.strip()
+        if not command:
+            return [MiniGridActions.DONE]
+
+        # Check for compound commands (split by "then", "and then", commas)
+        parts = re.split(r"\bthen\b|,\s*(?:and\s+)?", command, flags=re.I)
+        parts = [p.strip() for p in parts if p.strip()]
+
+        actions = []
+        for part in parts:
+            parsed = self._parse_single(part, agent_facing)
+            actions.extend(parsed)
+            # Update facing after turns for compound commands
+            for a in parsed:
+                if a == MiniGridActions.TURN_LEFT:
+                    agent_facing = (agent_facing + 3) % 4  # -1 mod 4
+                elif a == MiniGridActions.TURN_RIGHT:
+                    agent_facing = (agent_facing + 1) % 4
+
+        return actions if actions else [MiniGridActions.DONE]
+
+    def _parse_single(self, command: str, agent_facing: int) -> list[int]:
+        """Parse a single (non-compound) command."""
+        # Check for compass directions first
+        compass_match = COMPASS_PATTERN.search(command)
+        if compass_match:
+            direction = compass_match.group(1).lower()
+            target_facing = COMPASS_TO_FACING.get(direction)
+            if target_facing is not None:
+                return self._turn_sequence(agent_facing, target_facing) + [MiniGridActions.MOVE_FORWARD]
+
+        # Try pattern matching
+        for pattern, action_id in ACTION_PATTERNS:
+            if pattern.search(command):
+                return [action_id]
+
+        # Could not parse - return wait
+        return [MiniGridActions.DONE]
+
+    def _turn_sequence(self, current_facing: int, target_facing: int) -> list[int]:
+        """
+        Generate turn sequence to change from current to target facing.
+
+        Chooses the shortest rotation direction.
+        """
+        if current_facing == target_facing:
+            return []
+
+        # Calculate clockwise and counterclockwise distances
+        cw_dist = (target_facing - current_facing) % 4
+        ccw_dist = (current_facing - target_facing) % 4
+
+        if cw_dist <= ccw_dist:
+            return [MiniGridActions.TURN_RIGHT] * cw_dist
+        else:
+            return [MiniGridActions.TURN_LEFT] * ccw_dist
diff --git a/src/v1_1/nl_domain/nl_env.py b/src/v1_1/nl_domain/nl_env.py
new file mode 100644
index 00000000..4837380b
--- /dev/null
+++ b/src/v1_1/nl_domain/nl_env.py
@@ -0,0 +1,145 @@
+"""
+Natural Language GridWorld Environment
+
+Wraps any AbstractGridBackend with a text-based action space.
+Accepts NL commands, parses them to discrete actions, and executes.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import numpy as np
+import gymnasium as gym
+from gymnasium import spaces
+
+try:
+    from ..gridworld.backends.base import AbstractGridBackend, GridState
+    from ..gridworld.backends.minigrid_backend import MiniGridBackend
+    from ..gridworld.task_spec import TaskSpecification
+except ImportError:
+    from gridworld.backends.base import AbstractGridBackend, GridState
+    from gridworld.backends.minigrid_backend import MiniGridBackend
+    from gridworld.task_spec import TaskSpecification
+from .nl_action_parser import NLActionParser
+
+
+class NLGridWorldEnv(gym.Env):
+    """
+    Natural Language GridWorld environment.
+
+    Wraps an AbstractGridBackend and accepts text action commands.
+    Parses NL commands to discrete MiniGrid actions and executes them.
+
+    Usage:
+        env = NLGridWorldEnv(task_spec)
+        obs, info = env.reset(seed=42)
+        obs, reward, terminated, truncated, info = env.step("go forward")
+        obs, reward, terminated, truncated, info = env.step("turn left then move forward")
+    """
+
+    metadata = {
+        "render_modes": ["rgb_array", "human"],
+    }
+
+    def __init__(
+        self,
+        task_spec: TaskSpecification,
+        backend: Optional[AbstractGridBackend] = None,
+        render_mode: str = "rgb_array",
+    ):
+        super().__init__()
+
+        self.task_spec = task_spec
+        self.backend = backend or MiniGridBackend(render_mode=render_mode)
+        self.parser = NLActionParser()
+        self.render_mode = render_mode
+
+        # Text action space
+        self.action_space = spaces.Text(min_length=1, max_length=256)
+
+        # Observation space (RGB image)
+        self.observation_space = spaces.Box(
+            low=0, high=255, shape=(64, 64, 3), dtype=np.uint8
+        )
+
+        # State tracking
+        self._state: Optional[GridState] = None
+        self._obs: Optional[np.ndarray] = None
+        self._nl_history: list[str] = []
+
+    def reset(
+        self,
+        seed: Optional[int] = None,
+        options: Optional[dict] = None,
+    ) -> tuple[np.ndarray, dict]:
+        """Reset environment to initial state."""
+        self.backend.configure(self.task_spec)
+        obs, state, info = self.backend.reset(seed=seed)
+        self._state = state
+        self._obs = obs
+        self._nl_history = []
+
+        info["state"] = state
+        info["mission"] = self.backend.get_mission_text()
+        return obs, info
+
+    def step(self, nl_command: str) -> tuple[np.ndarray, float, bool, bool, dict]:
+        """
+        Execute a natural language command.
+
+        The command is parsed into one or more discrete actions,
+        which are executed sequentially. The observation and reward
+        from the final action are returned.
+
+        Args:
+            nl_command: Natural language action command
+
+        Returns:
+            (observation, reward, terminated, truncated, info)
+        """
+        if self._state is None:
+            raise RuntimeError("Call reset() before step()")
+
+        self._nl_history.append(nl_command)
+
+        # Parse NL command to action sequence
+        agent_facing = self._state.agent_direction
+        actions = self.parser.parse(nl_command, agent_facing)
+
+        # Execute all parsed actions
+        total_reward = 0.0
+        terminated = False
+        truncated = False
+        obs = self._obs
+        info = {}
+
+        for action in actions:
+            if terminated or truncated:
+                break
+            obs, reward, terminated, truncated, state, info = self.backend.step(action)
+            self._state = state
+            self._obs = obs
+            total_reward += reward
+
+        info["state"] = self._state
+        info["parsed_actions"] = actions
+        info["nl_history"] = self._nl_history.copy()
+
+        return obs, total_reward, terminated, truncated, info
+
+    def render(self) -> Optional[np.ndarray]:
+        """Render current state."""
+        return self.backend.render()
+
+    def get_state(self) -> GridState:
+        """Get current grid state."""
+        return self._state
+
+    def get_nl_history(self) -> list[str]:
+        """Get history of NL commands issued."""
+        return self._nl_history.copy()
+
+    def close(self):
+        """Clean up resources."""
+        self.backend.close()
diff --git a/src/v1_1/nl_domain/nl_model_interface.py b/src/v1_1/nl_domain/nl_model_interface.py
new file mode 100644
index 00000000..d94c6b96
--- /dev/null
+++ b/src/v1_1/nl_domain/nl_model_interface.py
@@ -0,0 +1,63 @@
+"""
+Natural Language Model Interface
+
+Extends the standard ModelInterface for models that produce
+natural language action commands instead of discrete action IDs.
+"""
+
+from __future__ import annotations
+
+from abc import abstractmethod
+
+try:
+    from ..model_interface import ModelInterface, ModelInput, ModelOutput
+except ImportError:
+    from model_interface import ModelInterface, ModelInput, ModelOutput
+from .nl_action_parser import NLActionParser
+
+
+class NLModelInterface(ModelInterface):
+    """
+    Model interface for NL-based action prediction.
+
+    Models implementing this interface produce natural language commands
+    (e.g., "turn left then move forward") which are parsed to action IDs.
+
+    Subclasses must implement predict_nl() instead of predict().
+    """
+
+    def __init__(self):
+        self._parser = NLActionParser()
+
+    @abstractmethod
+    def predict_nl(self, input: ModelInput) -> str:
+        """
+        Predict a natural language action command.
+
+        Args:
+            input: ModelInput with image and context
+
+        Returns:
+            Natural language command string
+        """
+        ...
+
+    def predict(self, input: ModelInput) -> ModelOutput:
+        """
+        Predict action by generating NL command and parsing it.
+
+        This wraps predict_nl() for compatibility with the standard
+        evaluation harness.
+        """
+        nl_command = self.predict_nl(input)
+
+        # Parse NL to action sequence; use first action
+        # Agent facing defaults to 0 since we don't have it in ModelInput
+        actions = self._parser.parse(nl_command, agent_facing=0)
+        action = actions[0] if actions else 6
+
+        return ModelOutput(
+            action=action,
+            reasoning=f"NL command: {nl_command}",
+            raw_output=nl_command,
+        )
diff --git a/src/v1_1/play_task.py b/src/v1_1/play_task.py
new file mode 100644
index 00000000..e228c1c2
--- /dev/null
+++ b/src/v1_1/play_task.py
@@ -0,0 +1,709 @@
+#!/usr/bin/env python3
+"""
+Interactive MiniGrid Task Player
+
+A pygame-based interactive player for MiniGrid task JSON files.
+Load any task specification and play through it using keyboard controls.
+
+Usage:
+    python play_task.py gridworld/tasks/tier3/gates_switches_002.json
+    python play_task.py gridworld/tasks/tier1/maze_simple_001.json --record
+
+Controls:
+    Arrow Up / W    : Move forward
+    Arrow Left / A  : Turn left
+    Arrow Right / D : Turn right
+    Space           : Pick up item
+    X               : Drop item
+    T / E           : Toggle (open door, press switch)
+    Backspace       : Wait / done (no-op)
+    R               : Reset current task
+    Q / Escape      : Quit
+    1-5             : Switch to tier N (loads first task from that tier)
+    [ / ]           : Previous / next task within current tier
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+
+_SCRIPT_DIR = Path(__file__).resolve().parent
+
+# Ensure our v1_1 directory is on sys.path for gridworld imports
+_script_dir_str = str(_SCRIPT_DIR)
+if _script_dir_str not in sys.path:
+    sys.path.insert(0, _script_dir_str)
+
+import numpy as np
+
+try:
+    import pygame
+except ImportError:
+    print(
+        "Error: pygame is not installed.\n"
+        "Install it with: pip install pygame\n"
+        "  or: conda install -c conda-forge pygame"
+    )
+    sys.exit(1)
+
+from gridworld.task_spec import TaskSpecification
+from gridworld.task_parser import TaskParser
+from gridworld.backends.minigrid_backend import MiniGridBackend
+from gridworld.backends.base import GridState
+from gridworld.actions import MiniGridActions, ACTION_SHORT
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+# Window layout
+GRID_DISPLAY_SIZE = 512      # Grid rendering area (square, left side)
+INFO_PANEL_WIDTH = 320       # Info panel width (right side)
+WINDOW_HEIGHT = GRID_DISPLAY_SIZE
+WINDOW_WIDTH = GRID_DISPLAY_SIZE + INFO_PANEL_WIDTH
+
+# Colors
+COLOR_BG = (30, 30, 30)
+COLOR_PANEL_BG = (40, 40, 48)
+COLOR_TEXT = (220, 220, 220)
+COLOR_TEXT_DIM = (140, 140, 150)
+COLOR_TEXT_HIGHLIGHT = (100, 220, 130)
+COLOR_TEXT_WARNING = (255, 180, 60)
+COLOR_TEXT_ERROR = (255, 80, 80)
+COLOR_TEXT_TITLE = (180, 200, 255)
+COLOR_SEPARATOR = (70, 70, 80)
+COLOR_SUCCESS_BG = (20, 100, 40, 180)
+COLOR_FAIL_BG = (120, 20, 20, 180)
+COLOR_OVERLAY_TEXT = (255, 255, 255)
+
+# Direction labels
+DIRECTION_NAMES = {0: "East (right)", 1: "South (down)", 2: "West (left)", 3: "North (up)"}
+DIRECTION_ARROWS = {0: "->", 1: "v", 2: "<-", 3: "^"}
+
+# Key repeat settings (milliseconds)
+KEY_REPEAT_DELAY = 200
+KEY_REPEAT_INTERVAL = 100
+
+# Frame rate
+FPS = 30
+
+
+# ---------------------------------------------------------------------------
+# Task discovery: find all task JSON files organized by tier
+# ---------------------------------------------------------------------------
+
+def discover_tasks(base_dir: Path) -> dict[int, list[Path]]:
+    """
+    Scan the tasks directory and return a mapping of tier number to sorted
+    list of JSON task file paths.
+    """
+    tasks_dir = base_dir / "gridworld" / "tasks"
+    tier_tasks: dict[int, list[Path]] = {}
+
+    if not tasks_dir.exists():
+        return tier_tasks
+
+    for tier_num in range(1, 6):
+        tier_dir = tasks_dir / f"tier{tier_num}"
+        if tier_dir.exists():
+            json_files = sorted(tier_dir.glob("*.json"))
+            if json_files:
+                tier_tasks[tier_num] = json_files
+
+    return tier_tasks
+
+
+# ---------------------------------------------------------------------------
+# Interactive player
+# ---------------------------------------------------------------------------
+
+class MiniGridPlayer:
+    """
+    Pygame-based interactive player for MiniGrid task JSON files.
+    """
+
+    def __init__(self, task_path: str, record: bool = False):
+        self.base_dir = _SCRIPT_DIR
+        self.record = record
+        self.trajectory: list[dict] = []
+        self.task_path: Optional[Path] = None
+        self.task_spec: Optional[TaskSpecification] = None
+
+        # Backend for environment logic
+        self.backend = MiniGridBackend(render_mode="rgb_array")
+
+        # Discover all tier tasks for tier-switching and prev/next navigation
+        self.tier_tasks = discover_tasks(self.base_dir)
+        self.current_tier: int = 1
+        self.current_task_index: int = 0
+
+        # Episode state
+        self.state: Optional[GridState] = None
+        self.episode_done = False
+        self.episode_success = False
+        self.total_reward: float = 0.0
+        self.last_action_name: str = ""
+
+        # Pygame setup
+        pygame.init()
+        self.screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
+        pygame.display.set_caption("MiniGrid Task Player")
+        pygame.key.set_repeat(KEY_REPEAT_DELAY, KEY_REPEAT_INTERVAL)
+        self.clock = pygame.time.Clock()
+
+        # Font setup -- use a clean monospace font
+        self.font_title = self._load_font(22, bold=True)
+        self.font_main = self._load_font(16)
+        self.font_small = self._load_font(13)
+        self.font_overlay = self._load_font(48, bold=True)
+        self.font_overlay_sub = self._load_font(20)
+
+        # Load the initial task
+        self._load_task(task_path)
+
+    def _load_font(self, size: int, bold: bool = False) -> pygame.font.Font:
+        """Load a monospace font, falling back to the default if needed."""
+        # Try common monospace fonts
+        mono_names = ["DejaVu Sans Mono", "Consolas", "Courier New", "monospace"]
+        for name in mono_names:
+            path = pygame.font.match_font(name, bold=bold)
+            if path:
+                try:
+                    return pygame.font.Font(path, size)
+                except Exception:
+                    pass
+        # Fallback to pygame default
+        return pygame.font.SysFont(None, size, bold=bold)
+
+    # ------------------------------------------------------------------
+    # Task loading
+    # ------------------------------------------------------------------
+
+    def _load_task(self, path: str) -> None:
+        """Load a task JSON file and reset the environment."""
+        resolved = Path(path)
+        if not resolved.is_absolute():
+            resolved = self.base_dir / resolved
+
+        if not resolved.exists():
+            print(f"Error: task file not found: {resolved}")
+            return
+
+        self.task_path = resolved
+        self.task_spec = TaskSpecification.from_json(str(resolved))
+
+        # Update current tier and index tracking
+        self.current_tier = self.task_spec.difficulty_tier
+        if self.current_tier in self.tier_tasks:
+            try:
+                self.current_task_index = self.tier_tasks[self.current_tier].index(resolved)
+            except ValueError:
+                self.current_task_index = 0
+
+        self._reset_env()
+
+    def _reset_env(self) -> None:
+        """Reset the environment from the current task spec."""
+        if self.task_spec is None:
+            return
+
+        # Save previous trajectory if recording and it has content
+        if self.record and self.trajectory:
+            self._save_trajectory()
+
+        self.backend.configure(self.task_spec)
+        _obs, self.state, _info = self.backend.reset(seed=self.task_spec.seed)
+
+        self.episode_done = False
+        self.episode_success = False
+        self.total_reward = 0.0
+        self.last_action_name = ""
+        self.trajectory = []
+
+        if self.record:
+            self.trajectory.append({
+                "step": 0,
+                "action": None,
+                "action_name": None,
+                "state": self.state.to_dict() if self.state else {},
+            })
+
+        pygame.display.set_caption(
+            f"MiniGrid Player  |  {self.task_spec.task_id}  "
+            f"(Tier {self.task_spec.difficulty_tier})"
+        )
+
+    def _load_tier(self, tier: int) -> None:
+        """Switch to the first task in the given tier."""
+        if tier in self.tier_tasks and self.tier_tasks[tier]:
+            self.current_tier = tier
+            self.current_task_index = 0
+            self._load_task(str(self.tier_tasks[tier][0]))
+
+    def _load_adjacent_task(self, delta: int) -> None:
+        """Load the next (+1) or previous (-1) task within the current tier."""
+        if self.current_tier not in self.tier_tasks:
+            return
+        tasks = self.tier_tasks[self.current_tier]
+        if not tasks:
+            return
+        self.current_task_index = (self.current_task_index + delta) % len(tasks)
+        self._load_task(str(tasks[self.current_task_index]))
+
+    # ------------------------------------------------------------------
+    # Step execution
+    # ------------------------------------------------------------------
+
+    def _step(self, action: int) -> None:
+        """Execute a single action in the environment."""
+        if self.episode_done or self.state is None:
+            return
+
+        self.last_action_name = ACTION_SHORT.get(action, f"#{action}")
+
+        _obs, reward, terminated, truncated, self.state, _info = self.backend.step(action)
+        self.total_reward += reward
+
+        if self.record:
+            self.trajectory.append({
+                "step": self.state.step_count,
+                "action": action,
+                "action_name": self.last_action_name,
+                "reward": reward,
+                "terminated": terminated,
+                "truncated": truncated,
+                "state": self.state.to_dict(),
+            })
+
+        if terminated or truncated:
+            self.episode_done = True
+            self.episode_success = self.state.goal_reached
+
+    # ------------------------------------------------------------------
+    # Recording / trajectory saving
+    # ------------------------------------------------------------------
+
+    def _save_trajectory(self) -> None:
+        """Save the recorded trajectory to a JSON file."""
+        if not self.trajectory:
+            return
+
+        task_id = self.task_spec.task_id if self.task_spec else "unknown"
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        filename = f"trajectory_{task_id}_{timestamp}.json"
+        output_path = self.base_dir / filename
+
+        data = {
+            "task_id": task_id,
+            "task_file": str(self.task_path) if self.task_path else None,
+            "difficulty_tier": self.task_spec.difficulty_tier if self.task_spec else None,
+            "total_steps": len(self.trajectory) - 1,  # exclude initial state
+            "total_reward": self.total_reward,
+            "success": self.episode_success,
+            "episode_done": self.episode_done,
+            "trajectory": self.trajectory,
+        }
+
+        with open(output_path, "w") as f:
+            json.dump(data, f, indent=2)
+        print(f"Trajectory saved to: {output_path}")
+
+    # ------------------------------------------------------------------
+    # Rendering
+    # ------------------------------------------------------------------
+
+    def _render_grid(self) -> None:
+        """Render the MiniGrid environment onto the left side of the screen."""
+        rgb_array = self.backend.render()  # numpy ndarray (H, W, 3)
+
+        # pygame.surfarray expects (W, H, 3) so we transpose
+        # But pygame.image.frombuffer can work with (H, W, 3) directly
+        h, w, _c = rgb_array.shape
+
+        # Create a surface from the raw RGB data
+        surf = pygame.image.frombuffer(rgb_array.tobytes(), (w, h), "RGB")
+
+        # Scale to fit the display area
+        scaled = pygame.transform.smoothscale(surf, (GRID_DISPLAY_SIZE, GRID_DISPLAY_SIZE))
+        self.screen.blit(scaled, (0, 0))
+
+    def _render_info_panel(self) -> None:
+        """Render the info panel on the right side of the screen."""
+        panel_x = GRID_DISPLAY_SIZE
+        panel_rect = pygame.Rect(panel_x, 0, INFO_PANEL_WIDTH, WINDOW_HEIGHT)
+        pygame.draw.rect(self.screen, COLOR_PANEL_BG, panel_rect)
+
+        # Draw a vertical separator line
+        pygame.draw.line(
+            self.screen, COLOR_SEPARATOR,
+            (panel_x, 0), (panel_x, WINDOW_HEIGHT), 2
+        )
+
+        x = panel_x + 12
+        y = 10
+
+        # -- Title --
+        task_id = self.task_spec.task_id if self.task_spec else "No task loaded"
+        y = self._draw_text(f"Task: {task_id}", x, y, self.font_title, COLOR_TEXT_TITLE)
+        y += 2
+
+        if self.task_spec:
+            y = self._draw_text(
+                f"Tier {self.task_spec.difficulty_tier}",
+                x, y, self.font_main, COLOR_TEXT_DIM
+            )
+
+        # Separator
+        y += 4
+        pygame.draw.line(self.screen, COLOR_SEPARATOR, (x, y), (panel_x + INFO_PANEL_WIDTH - 12, y))
+        y += 8
+
+        # -- Agent State --
+        if self.state:
+            y = self._draw_text("AGENT STATE", x, y, self.font_main, COLOR_TEXT_HIGHLIGHT)
+            y += 2
+
+            pos = self.state.agent_position
+            y = self._draw_text(
+                f"Position:  ({pos[0]}, {pos[1]})",
+                x, y, self.font_main, COLOR_TEXT
+            )
+
+            dir_name = DIRECTION_NAMES.get(self.state.agent_direction, "?")
+            arrow = DIRECTION_ARROWS.get(self.state.agent_direction, "?")
+            y = self._draw_text(
+                f"Direction: {arrow} {dir_name}",
+                x, y, self.font_main, COLOR_TEXT
+            )
+
+            carrying = self.state.agent_carrying or "nothing"
+            color = COLOR_TEXT_WARNING if self.state.agent_carrying else COLOR_TEXT_DIM
+            y = self._draw_text(f"Carrying:  {carrying}", x, y, self.font_main, color)
+
+            y += 2
+            step_text = f"Steps: {self.state.step_count} / {self.state.max_steps}"
+            y = self._draw_text(step_text, x, y, self.font_main, COLOR_TEXT)
+
+            reward_text = f"Reward: {self.total_reward:.3f}"
+            y = self._draw_text(reward_text, x, y, self.font_main, COLOR_TEXT)
+
+            if self.last_action_name:
+                y = self._draw_text(
+                    f"Last action: {self.last_action_name}",
+                    x, y, self.font_main, COLOR_TEXT_DIM
+                )
+        else:
+            y = self._draw_text("No environment loaded", x, y, self.font_main, COLOR_TEXT_ERROR)
+
+        # Separator
+        y += 4
+        pygame.draw.line(self.screen, COLOR_SEPARATOR, (x, y), (panel_x + INFO_PANEL_WIDTH - 12, y))
+        y += 8
+
+        # -- Mechanism State --
+        if self.state:
+            has_mechanisms = (
+                self.state.active_switches
+                or self.state.open_gates
+                or self.state.block_positions
+                or self.state.teleporter_cooldowns
+            )
+
+            if has_mechanisms:
+                y = self._draw_text("MECHANISMS", x, y, self.font_main, COLOR_TEXT_HIGHLIGHT)
+                y += 2
+
+                if self.state.active_switches:
+                    switches_str = ", ".join(sorted(self.state.active_switches))
+                    y = self._draw_text(f"Active switches: {switches_str}", x, y, self.font_small, COLOR_TEXT_WARNING)
+
+                if self.state.open_gates:
+                    gates_str = ", ".join(sorted(self.state.open_gates))
+                    y = self._draw_text(f"Open gates: {gates_str}", x, y, self.font_small, COLOR_TEXT_HIGHLIGHT)
+
+                if self.state.block_positions:
+                    for bid, bpos in self.state.block_positions.items():
+                        y = self._draw_text(
+                            f"Block {bid}: ({bpos[0]}, {bpos[1]})",
+                            x, y, self.font_small, COLOR_TEXT
+                        )
+
+                if self.state.teleporter_cooldowns:
+                    for tid, cd in self.state.teleporter_cooldowns.items():
+                        cd_text = f"ready" if cd == 0 else f"cooldown {cd}"
+                        y = self._draw_text(
+                            f"Teleporter {tid}: {cd_text}",
+                            x, y, self.font_small, COLOR_TEXT
+                        )
+
+                y += 4
+                pygame.draw.line(
+                    self.screen, COLOR_SEPARATOR,
+                    (x, y), (panel_x + INFO_PANEL_WIDTH - 12, y)
+                )
+                y += 8
+
+        # -- Mission --
+        if self.task_spec:
+            y = self._draw_text("MISSION", x, y, self.font_main, COLOR_TEXT_HIGHLIGHT)
+            y += 2
+            mission = self.backend.get_mission_text()
+            # Word-wrap the mission text
+            y = self._draw_wrapped_text(mission, x, y, self.font_small, COLOR_TEXT, INFO_PANEL_WIDTH - 24)
+
+        # Separator
+        y += 4
+        pygame.draw.line(self.screen, COLOR_SEPARATOR, (x, y), (panel_x + INFO_PANEL_WIDTH - 12, y))
+        y += 8
+
+        # -- Task navigation --
+        if self.current_tier in self.tier_tasks:
+            tasks = self.tier_tasks[self.current_tier]
+            nav_text = f"Task {self.current_task_index + 1}/{len(tasks)} in tier {self.current_tier}"
+            y = self._draw_text(nav_text, x, y, self.font_small, COLOR_TEXT_DIM)
+            y += 4
+
+        # -- Recording indicator --
+        if self.record:
+            y = self._draw_text("REC", x, y, self.font_main, COLOR_TEXT_ERROR)
+            y += 4
+
+        # -- Controls Reference (at the bottom) --
+        controls_y = WINDOW_HEIGHT - 195
+        pygame.draw.line(
+            self.screen, COLOR_SEPARATOR,
+            (x, controls_y), (panel_x + INFO_PANEL_WIDTH - 12, controls_y)
+        )
+        controls_y += 6
+        controls_y = self._draw_text("CONTROLS", x, controls_y, self.font_main, COLOR_TEXT_HIGHLIGHT)
+        controls_y += 2
+
+        controls = [
+            ("Up / W", "Move forward"),
+            ("Left / A", "Turn left"),
+            ("Right / D", "Turn right"),
+            ("Space", "Pick up"),
+            ("X", "Drop"),
+            ("T / E", "Toggle"),
+            ("Backspace", "Wait"),
+            ("R", "Reset"),
+            ("1-5", "Switch tier"),
+            ("[ / ]", "Prev / next task"),
+            ("Q / Esc", "Quit"),
+        ]
+        for key, desc in controls:
+            controls_y = self._draw_text(
+                f"{key:>11s}  {desc}", x, controls_y, self.font_small, COLOR_TEXT_DIM
+            )
+
+    def _render_overlay(self) -> None:
+        """Render success/failure overlay when episode ends."""
+        if not self.episode_done:
+            return
+
+        # Semi-transparent overlay
+        overlay = pygame.Surface((GRID_DISPLAY_SIZE, GRID_DISPLAY_SIZE), pygame.SRCALPHA)
+        if self.episode_success:
+            overlay.fill((20, 100, 40, 160))
+            main_text = "SUCCESS!"
+            main_color = (100, 255, 130)
+        else:
+            overlay.fill((120, 20, 20, 160))
+            main_text = "FAILED"
+            main_color = (255, 100, 100)
+
+        self.screen.blit(overlay, (0, 0))
+
+        # Main text centered on the grid area
+        text_surf = self.font_overlay.render(main_text, True, main_color)
+        text_rect = text_surf.get_rect(
+            center=(GRID_DISPLAY_SIZE // 2, GRID_DISPLAY_SIZE // 2 - 20)
+        )
+        self.screen.blit(text_surf, text_rect)
+
+        # Sub text
+        if self.state:
+            sub_text = f"Steps: {self.state.step_count} / {self.state.max_steps}   Reward: {self.total_reward:.3f}"
+        else:
+            sub_text = ""
+        sub_surf = self.font_overlay_sub.render(sub_text, True, COLOR_OVERLAY_TEXT)
+        sub_rect = sub_surf.get_rect(
+            center=(GRID_DISPLAY_SIZE // 2, GRID_DISPLAY_SIZE // 2 + 30)
+        )
+        self.screen.blit(sub_surf, sub_rect)
+
+        # Hint
+        hint_text = "Press R to reset, Q to quit, [ ] to switch task"
+        hint_surf = self.font_small.render(hint_text, True, COLOR_TEXT_DIM)
+        hint_rect = hint_surf.get_rect(
+            center=(GRID_DISPLAY_SIZE // 2, GRID_DISPLAY_SIZE // 2 + 65)
+        )
+        self.screen.blit(hint_surf, hint_rect)
+
+    def _draw_text(self, text: str, x: int, y: int, font: pygame.font.Font, color: tuple) -> int:
+        """Draw a single line of text and return the y position below it."""
+        surf = font.render(text, True, color)
+        self.screen.blit(surf, (x, y))
+        return y + surf.get_height() + 2
+
+    def _draw_wrapped_text(
+        self, text: str, x: int, y: int,
+        font: pygame.font.Font, color: tuple, max_width: int
+    ) -> int:
+        """Draw word-wrapped text and return the y position below it."""
+        words = text.split()
+        lines: list[str] = []
+        current_line = ""
+        for word in words:
+            test = f"{current_line} {word}".strip()
+            if font.size(test)[0] <= max_width:
+                current_line = test
+            else:
+                if current_line:
+                    lines.append(current_line)
+                current_line = word
+        if current_line:
+            lines.append(current_line)
+
+        for line in lines:
+            y = self._draw_text(line, x, y, font, color)
+        return y
+
+    # ------------------------------------------------------------------
+    # Main loop
+    # ------------------------------------------------------------------
+
+    def run(self) -> None:
+        """Run the main event loop."""
+        running = True
+
+        while running:
+            for event in pygame.event.get():
+                if event.type == pygame.QUIT:
+                    running = False
+                    break
+
+                if event.type == pygame.KEYDOWN:
+                    action = self._handle_keydown(event)
+
+                    if action == "quit":
+                        running = False
+                        break
+                    elif action == "reset":
+                        self._reset_env()
+                    elif isinstance(action, int):
+                        self._step(action)
+
+            # Render
+            self.screen.fill(COLOR_BG)
+
+            if self.backend.env is not None:
+                self._render_grid()
+            else:
+                # No env loaded -- show placeholder
+                placeholder_surf = self.font_main.render(
+                    "No environment loaded. Press 1-5 to load a tier.",
+                    True, COLOR_TEXT_DIM
+                )
+                self.screen.blit(placeholder_surf, (20, GRID_DISPLAY_SIZE // 2))
+
+            self._render_info_panel()
+            self._render_overlay()
+
+            pygame.display.flip()
+            self.clock.tick(FPS)
+
+        # Cleanup
+        if self.record and self.trajectory:
+            self._save_trajectory()
+
+        self.backend.close()
+        pygame.quit()
+
+    def _handle_keydown(self, event: pygame.event.Event) -> Optional[int | str]:
+        """
+        Map a pygame KEYDOWN event to an action integer, or a control string
+        ('quit', 'reset'), or None if not mapped.
+        """
+        key = event.key
+
+        # Quit
+        if key in (pygame.K_q, pygame.K_ESCAPE):
+            return "quit"
+
+        # Reset
+        if key == pygame.K_r:
+            return "reset"
+
+        # Tier switching (number keys 1-5)
+        if key in (pygame.K_1, pygame.K_2, pygame.K_3, pygame.K_4, pygame.K_5):
+            tier = key - pygame.K_0
+            self._load_tier(tier)
+            return None
+
+        # Task navigation
+        if key == pygame.K_LEFTBRACKET:
+            self._load_adjacent_task(-1)
+            return None
+        if key == pygame.K_RIGHTBRACKET:
+            self._load_adjacent_task(1)
+            return None
+
+        # If episode is done, ignore action keys (must reset first)
+        if self.episode_done:
+            return None
+
+        # Movement and interaction
+        if key in (pygame.K_UP, pygame.K_w):
+            return MiniGridActions.MOVE_FORWARD   # 2
+        if key in (pygame.K_LEFT, pygame.K_a):
+            return MiniGridActions.TURN_LEFT      # 0
+        if key in (pygame.K_RIGHT, pygame.K_d):
+            return MiniGridActions.TURN_RIGHT     # 1
+        if key == pygame.K_SPACE:
+            return MiniGridActions.PICKUP          # 3
+        if key == pygame.K_x:
+            return MiniGridActions.DROP            # 4
+        if key in (pygame.K_t, pygame.K_e):
+            return MiniGridActions.TOGGLE          # 5
+        if key == pygame.K_BACKSPACE:
+            return MiniGridActions.DONE            # 6
+
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Interactive MiniGrid task player",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+    parser.add_argument(
+        "task_file",
+        nargs="?",
+        default="gridworld/tasks/tier1/maze_simple_001.json",
+        help="Path to a task JSON file (default: tier1 simple maze)",
+    )
+    parser.add_argument(
+        "--record",
+        action="store_true",
+        help="Record trajectory to a JSON file on exit or task switch",
+    )
+    args = parser.parse_args()
+
+    player = MiniGridPlayer(task_path=args.task_file, record=args.record)
+    player.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/run_eval.py b/src/v1_1/run_eval.py
new file mode 100644
index 00000000..fffc81e7
--- /dev/null
+++ b/src/v1_1/run_eval.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+"""
+MultiNet v1.1 Evaluation CLI
+
+Evaluate models on MiniGrid tasks across tiers 1-5.
+
+Usage:
+    python run_eval.py --model random --tier all
+    python run_eval.py --model random --tier 1
+    python run_eval.py --model ollama --ollama-model qwen2.5vl:7b --tier 1-3
+    python run_eval.py --model file_based --work-dir /tmp/eval --tier 1
+    python run_eval.py --model pi0 --device cuda:0 --tier all
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from pathlib import Path
+from typing import Optional
+
+
+def parse_tiers(tier_str: str) -> list[int]:
+    """Parse tier specification: 'all', '1', '1-3', '2,4,5'."""
+    if tier_str.lower() == "all":
+        return [1, 2, 3, 4, 5]
+    if "-" in tier_str:
+        start, end = tier_str.split("-")
+        return list(range(int(start), int(end) + 1))
+    if "," in tier_str:
+        return [int(t.strip()) for t in tier_str.split(",")]
+    return [int(tier_str)]
+
+
+def load_model(args) -> "ModelInterface":
+    """Load model based on CLI arguments."""
+    from model_interface import ModelInterface, RandomModelInterface, FileBasedModelInterface
+
+    model_name = args.model.lower()
+
+    if model_name == "random":
+        return RandomModelInterface(seed=args.seed)
+
+    elif model_name == "file_based":
+        if not args.work_dir:
+            raise ValueError("--work-dir required for file_based model")
+        model = FileBasedModelInterface(work_dir=args.work_dir, timeout=args.timeout)
+        model.setup()
+        return model
+
+    elif model_name == "ollama":
+        from adapters.ollama_vlm_adapter import OllamaVLMAdapter
+        model = OllamaVLMAdapter(
+            model=args.ollama_model or "qwen2.5vl:7b",
+            base_url=args.ollama_url or "http://localhost:11434",
+        )
+        return model
+
+    elif model_name == "lmstudio":
+        from adapters.lmstudio_vlm_adapter import LMStudioVLMAdapter
+        model = LMStudioVLMAdapter(
+            model=args.ollama_model or "qwen2.5-vl-7b",
+            base_url=args.ollama_url or "http://localhost:1234",
+        )
+        return model
+
+    elif model_name == "pi0":
+        sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "eval" / "profiling" / "openpi" / "scripts"))
+        from minigrid_inference import Pi0MiniGridAdapter
+        model = Pi0MiniGridAdapter()
+        model.setup(device=args.device)
+        return model
+
+    elif model_name == "magma":
+        sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "v1" / "modules" / "Magma" / "scripts"))
+        from magma_minigrid_inference import MagmaMiniGridAdapter
+        model = MagmaMiniGridAdapter()
+        model.setup(device=args.device)
+        return model
+
+    elif model_name == "paligemma":
+        from adapters.paligemma_adapter import PaliGemmaMiniGridAdapter
+        model = PaliGemmaMiniGridAdapter()
+        model.setup(device=args.device)
+        return model
+
+    else:
+        raise ValueError(f"Unknown model: {model_name}. Options: random, file_based, ollama, lmstudio, pi0, magma, paligemma")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="MultiNet v1.1 Evaluation CLI")
+    parser.add_argument("--model", required=True,
+                        help="Model to evaluate: random, file_based, ollama, lmstudio, pi0, magma, paligemma")
+    parser.add_argument("--tier", default="all",
+                        help="Tier(s) to evaluate: 'all', '1', '1-3', '2,4,5'")
+    parser.add_argument("--backend", default="minigrid",
+                        choices=["minigrid", "multigrid"],
+                        help="Grid backend: minigrid (square) or multigrid (exotic tilings)")
+    parser.add_argument("--tiling", default="square",
+                        help="Tiling type for multigrid backend (default: square)")
+    parser.add_argument("--action-mode", default="discrete",
+                        choices=["discrete", "nl"],
+                        help="Action mode: discrete (int actions) or nl (natural language)")
+    parser.add_argument("--device", default="cpu",
+                        help="Device for model inference (default: cpu)")
+    parser.add_argument("--seed", type=int, default=42,
+                        help="Random seed (default: 42)")
+    parser.add_argument("--task-dir", default=None,
+                        help="Task directory (default: gridworld/tasks relative to this file)")
+    parser.add_argument("--output", default=None,
+                        help="Output JSON path for results")
+    parser.add_argument("--verbose", "-v", action="store_true",
+                        help="Print step-by-step info")
+
+    # Model-specific args
+    parser.add_argument("--ollama-model", default=None,
+                        help="Ollama model name (default: qwen2.5vl:7b)")
+    parser.add_argument("--ollama-url", default=None,
+                        help="Ollama API base URL")
+    parser.add_argument("--work-dir", default=None,
+                        help="Working directory for file_based model")
+    parser.add_argument("--timeout", type=float, default=60.0,
+                        help="Timeout for file_based model (seconds)")
+
+    args = parser.parse_args()
+
+    # Resolve task directory
+    if args.task_dir is None:
+        task_dir = str(Path(__file__).resolve().parent / "gridworld" / "tasks")
+    else:
+        task_dir = args.task_dir
+
+    tiers = parse_tiers(args.tier)
+
+    print(f"Model: {args.model}")
+    print(f"Backend: {args.backend}" + (f" ({args.tiling})" if args.backend == "multigrid" else ""))
+    print(f"Action mode: {args.action_mode}")
+    print(f"Tiers: {tiers}")
+    print(f"Task dir: {task_dir}")
+    print(f"Device: {args.device}")
+    print()
+
+    # Load model
+    model = load_model(args)
+    print(f"Loaded model: {model.model_name}")
+
+    # Create backend
+    from gridworld.backends import get_backend
+    if args.backend == "multigrid":
+        backend = get_backend("multigrid", tiling=args.tiling, render_mode="rgb_array")
+    else:
+        backend = get_backend("minigrid", render_mode="rgb_array")
+
+    # Run evaluation
+    from evaluation_harness import EvaluationHarness
+    harness = EvaluationHarness(model, backend=backend)
+
+    try:
+        result = harness.evaluate_all(
+            task_dir=task_dir,
+            tiers=tiers,
+            verbose=args.verbose,
+        )
+
+        # Print results
+        print("\n" + "=" * 60)
+        print(f"RESULTS: {result.model_name}")
+        print("=" * 60)
+
+        for tier, metrics in sorted(result.tier_metrics.items()):
+            print(f"\nTier {tier}:")
+            print(f"  Tasks: {metrics.num_tasks}")
+            print(f"  Success: {metrics.num_success}/{metrics.num_tasks} ({metrics.success_rate:.1%})")
+            print(f"  Avg Steps: {metrics.avg_steps:.1f}")
+            print(f"  Avg Reward: {metrics.avg_reward:.3f}")
+
+            for r in metrics.results:
+                status = "PASS" if r.success else "FAIL"
+                print(f"    [{status}] {r.task_id}: steps={r.steps_taken}, reward={r.total_reward:.3f}")
+
+        print(f"\nOverall:")
+        print(f"  Success Rate: {result.overall_success_rate:.1%}")
+        print(f"  Avg Steps: {result.overall_avg_steps:.1f}")
+        print(f"  Avg Reward: {result.overall_avg_reward:.3f}")
+
+        # Save results
+        if args.output:
+            result.save(args.output)
+            print(f"\nResults saved to {args.output}")
+        else:
+            # Default output path
+            output_path = Path(task_dir).parent / "results" / f"{model.model_name}_results.json"
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            result.save(str(output_path))
+            print(f"\nResults saved to {output_path}")
+
+    finally:
+        harness.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/tests/test_actions.py b/src/v1_1/tests/test_actions.py
new file mode 100644
index 00000000..1b0b13a0
--- /dev/null
+++ b/src/v1_1/tests/test_actions.py
@@ -0,0 +1,104 @@
+# test_actions.py
+
+import pytest
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.env import MultiGridEnv, Action
+
+
+class TestActions:
+    """Tests for action execution."""
+
+    @pytest.fixture
+    def simple_task(self):
+        """Simple task spec for testing."""
+        return {
+            "task_id": "test_001",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "cube_red",
+                        "type": "movable",
+                        "color": "red",
+                        "position": {"x": 0.5, "y": 0.5},
+                        "size": 0.1
+                    }
+                ],
+                "agent": {
+                    "position": {"x": 0.2, "y": 0.2},
+                    "facing": 0
+                }
+            },
+            "goal": {
+                "predicate": "object_in_zone",
+                "object_id": "cube_red",
+                "zone_id": "zone_blue"
+            },
+            "limits": {"max_steps": 100},
+            "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}}
+        }
+
+    def test_forward_movement(self, simple_task):
+        """Agent moves forward in facing direction."""
+        env = MultiGridEnv(simple_task, tiling="square")
+        obs, info = env.reset(seed=42)
+
+        initial_cell = env.state.agent.cell_id
+        initial_facing = env.state.agent.facing
+
+        obs, reward, term, trunc, info = env.step(Action.FORWARD)
+
+        # Agent should have moved
+        assert env.state.agent.cell_id != initial_cell or info.get("invalid_action")
+
+    def test_turn_changes_facing(self, simple_task):
+        """Turn actions change facing without moving."""
+        env = MultiGridEnv(simple_task, tiling="square")
+        env.reset(seed=42)
+
+        initial_cell = env.state.agent.cell_id
+        initial_facing = env.state.agent.facing
+
+        env.step(Action.TURN_RIGHT)
+
+        assert env.state.agent.cell_id == initial_cell  # Didn't move
+        assert env.state.agent.facing == (initial_facing + 1) % 4  # Facing changed
+
+    def test_invalid_move_into_wall(self, simple_task):
+        """Moving into boundary returns invalid_action."""
+        # Modify task to put agent at corner facing wall
+        simple_task["scene"]["agent"]["position"] = {"x": 0.05, "y": 0.05}
+        simple_task["scene"]["agent"]["facing"] = 0  # Facing north (into wall)
+
+        env = MultiGridEnv(simple_task, tiling="square")
+        env.reset(seed=42)
+
+        obs, reward, term, trunc, info = env.step(Action.FORWARD)
+
+        assert info.get("invalid_action") == True
+
+    def test_pickup_object(self, simple_task):
+        """Agent can pick up adjacent objects."""
+        # Position agent next to object
+        simple_task["scene"]["agent"]["position"] = {"x": 0.4, "y": 0.5}
+        simple_task["scene"]["agent"]["facing"] = 1  # Facing east (toward object)
+
+        env = MultiGridEnv(simple_task, tiling="square")
+        env.reset(seed=42)
+
+        assert env.state.agent.holding is None
+
+        # Move forward to object's cell
+        env.step(Action.FORWARD)
+
+        # Pick up
+        env.step(Action.PICKUP)
+
+        assert env.state.agent.holding is not None
+        assert env.state.agent.holding.id == "cube_red"
diff --git a/src/v1_1/tests/test_coordinates.py b/src/v1_1/tests/test_coordinates.py
new file mode 100644
index 00000000..0848d818
--- /dev/null
+++ b/src/v1_1/tests/test_coordinates.py
@@ -0,0 +1,64 @@
+# test_coordinates.py
+
+import pytest
+import math
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.tilings.square import SquareTiling
+from multigrid.tilings.hex import HexTiling
+from multigrid.tilings.triangle import TriangleTiling
+
+
+class TestCoordinateConversion:
+    """Tests for canonical <-> cell coordinate conversion."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_canonical_roundtrip_center(self, tiling_class):
+        """Converting to cell and back gives approximately same position."""
+        tiling = tiling_class()
+        tiling.generate_graph(10, 10, seed=0)
+
+        # Test center of grid
+        x, y = 0.5, 0.5
+        cell_id = tiling.canonical_to_cell(x, y)
+        x2, y2 = tiling.cell_to_canonical(cell_id)
+
+        # Should be within half a cell width
+        assert abs(x - x2) < 0.15
+        assert abs(y - y2) < 0.15
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_canonical_corners(self, tiling_class):
+        """Corner positions map to boundary cells."""
+        tiling = tiling_class()
+        tiling.generate_graph(10, 10, seed=0)
+
+        corners = [(0.01, 0.01), (0.99, 0.01), (0.01, 0.99), (0.99, 0.99)]
+
+        for x, y in corners:
+            cell_id = tiling.canonical_to_cell(x, y)
+            assert cell_id in tiling.cells, f"Corner ({x},{y}) mapped to invalid cell"
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_cell_positions_unique(self, tiling_class):
+        """Each cell has a unique canonical position."""
+        tiling = tiling_class()
+        tiling.generate_graph(10, 10, seed=0)
+
+        positions = set()
+        for cell_id in tiling.cells:
+            pos = tiling.cell_to_canonical(cell_id)
+            # Round to avoid floating point issues
+            pos_rounded = (round(pos[0], 6), round(pos[1], 6))
+            assert pos_rounded not in positions, f"Duplicate position for {cell_id}"
+            positions.add(pos_rounded)
diff --git a/src/v1_1/tests/test_distance.py b/src/v1_1/tests/test_distance.py
new file mode 100644
index 00000000..7d9fa712
--- /dev/null
+++ b/src/v1_1/tests/test_distance.py
@@ -0,0 +1,67 @@
+# test_distance.py
+
+import pytest
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.tilings.square import SquareTiling
+from multigrid.tilings.hex import HexTiling
+from multigrid.tilings.triangle import TriangleTiling
+
+
+class TestDistance:
+    """Tests for distance computation."""
+
+    def test_square_manhattan_distance(self):
+        """Square grid distance equals Manhattan distance."""
+        tiling = SquareTiling()
+        tiling.generate_graph(10, 10, seed=0)
+
+        # Cells 3 apart horizontally
+        d = tiling.distance("sq_5_2", "sq_5_5")
+        assert d == 3
+
+        # Cells 2 apart vertically
+        d = tiling.distance("sq_3_5", "sq_5_5")
+        assert d == 2
+
+        # Diagonal: Manhattan = 4
+        d = tiling.distance("sq_3_3", "sq_5_5")
+        assert d == 4
+
+    def test_hex_distance(self):
+        """Hex grid distance uses hex metric."""
+        tiling = HexTiling()
+        tiling.generate_graph(10, 10, seed=0)
+
+        # Adjacent cells are distance 1
+        for cell_id, cell in list(tiling.cells.items())[:10]:  # Test first 10 cells
+            for neighbor_id in cell.neighbors.values():
+                assert tiling.distance(cell_id, neighbor_id) == 1
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_distance_zero_to_self(self, tiling_class):
+        """Distance from cell to itself is 0."""
+        tiling = tiling_class()
+        tiling.generate_graph(5, 5, seed=0)
+
+        for cell_id in list(tiling.cells.keys())[:10]:  # Test first 10 cells
+            assert tiling.distance(cell_id, cell_id) == 0
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_distance_symmetry(self, tiling_class):
+        """Distance is symmetric."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(5, 5, seed=0)
+
+        cell_ids = list(cells.keys())[:10]  # Sample 10 cells
+        for i, id1 in enumerate(cell_ids):
+            for id2 in cell_ids[i+1:]:
+                assert tiling.distance(id1, id2) == tiling.distance(id2, id1)
diff --git a/src/v1_1/tests/test_edge_cases.py b/src/v1_1/tests/test_edge_cases.py
new file mode 100644
index 00000000..6e74dbd9
--- /dev/null
+++ b/src/v1_1/tests/test_edge_cases.py
@@ -0,0 +1,497 @@
+# test_edge_cases.py
+
+import pytest
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.env import MultiGridEnv, Action
+from multigrid.tilings import SquareTiling, HexTiling, TriangleTiling
+
+
+def create_simple_task(grid_size=10, agent_pos=(0.5, 0.5), max_steps=100):
+    """Helper to create a simple task spec."""
+    return {
+        "task_id": "test_task",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [
+                {
+                    "id": "cube_red",
+                    "type": "movable",
+                    "color": "red",
+                    "position": {"x": 0.5, "y": 0.5},
+                    "size": 0.1
+                }
+            ],
+            "agent": {
+                "position": {"x": agent_pos[0], "y": agent_pos[1]},
+                "facing": 0
+            }
+        },
+        "goal": {
+            "predicate": "reach_position",
+            "position": {"x": 0.9, "y": 0.9}
+        },
+        "limits": {"max_steps": max_steps},
+        "tiling": {"type": "square", "grid_size": {"width": grid_size, "height": grid_size}}
+    }
+
+
+class TestEdgeCases:
+    """Tests for edge cases and boundary conditions."""
+
+    def test_agent_at_corner(self):
+        """Agent at corner has limited movement options."""
+        task = create_simple_task(agent_pos=(0.01, 0.01))
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Corner cell should have exactly 2 neighbors (east and south)
+        cell_id = env.state.agent.cell_id
+        neighbors = env.tiling.cells[cell_id].neighbors
+        assert len(neighbors) == 2, f"Corner cell should have 2 neighbors, got {len(neighbors)}"
+
+    def test_agent_at_edge(self):
+        """Agent at edge has 3 movement options."""
+        task = create_simple_task(agent_pos=(0.5, 0.01))
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Edge cell (but not corner) should have 3 neighbors
+        cell_id = env.state.agent.cell_id
+        neighbors = env.tiling.cells[cell_id].neighbors
+        assert len(neighbors) == 3, f"Edge cell should have 3 neighbors, got {len(neighbors)}"
+
+    def test_seed_zero(self):
+        """Seed 0 is valid and produces deterministic results."""
+        task = create_simple_task()
+
+        env1 = MultiGridEnv(task, tiling="square")
+        env2 = MultiGridEnv(task, tiling="square")
+
+        obs1, info1 = env1.reset(seed=0)
+        obs2, info2 = env2.reset(seed=0)
+
+        # Observations should be identical
+        assert obs1.shape == obs2.shape
+        assert (obs1 == obs2).all(), "Same seed should produce identical observations"
+
+        # States should be identical
+        assert env1.state.agent.cell_id == env2.state.agent.cell_id
+        assert env1.state.agent.facing == env2.state.agent.facing
+
+    def test_max_steps_truncation(self):
+        """Episode truncates at max_steps."""
+        task = create_simple_task(max_steps=5)
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        truncated = False
+        for i in range(6):
+            obs, reward, terminated, truncated, info = env.step(Action.WAIT)
+            # Truncation happens ON the max_steps'th step (steps are 1-indexed in execution)
+            if i < 4:
+                assert not truncated, f"Should not truncate before max_steps (step {i+1})"
+            elif i == 4:
+                assert truncated, f"Should truncate at max_steps (step {i+1})"
+                assert not terminated, "Should not be terminated (goal not reached)"
+                break
+
+    @pytest.mark.parametrize("tiling_type", ["square", "hex", "triangle"])
+    def test_deterministic_reset_all_tilings(self, tiling_type):
+        """All tilings produce deterministic results with same seed."""
+        task = create_simple_task()
+        task["tiling"]["type"] = tiling_type
+
+        env1 = MultiGridEnv(task, tiling=tiling_type)
+        env2 = MultiGridEnv(task, tiling=tiling_type)
+
+        obs1, _ = env1.reset(seed=123)
+        obs2, _ = env2.reset(seed=123)
+
+        assert obs1.shape == obs2.shape
+        assert (obs1 == obs2).all(), f"{tiling_type} tiling should be deterministic"
+
+    def test_action_after_truncation(self):
+        """Steps after truncation continue but episode is done."""
+        task = create_simple_task(max_steps=2)
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Take steps until truncation
+        for _ in range(2):
+            obs, reward, terminated, truncated, info = env.step(Action.WAIT)
+
+        assert truncated, "Episode should be truncated"
+
+        # Gymnasium allows steps after done, but they should maintain done status
+        # This is standard gymnasium behavior - environment doesn't prevent stepping after done
+        obs, reward, terminated, truncated, info = env.step(Action.WAIT)
+        # No exception - this is expected gymnasium behavior
+
+
+    def test_push_at_boundary(self):
+        """Pushing object at grid boundary fails (destination off-grid)."""
+        # Place movable object at east edge, agent behind it facing east
+        task = create_simple_task(grid_size=8)
+        # Object at right edge
+        task["scene"]["objects"][0]["position"] = {"x": 0.95, "y": 0.5}
+        # Agent one cell to the left of object
+        task["scene"]["agent"]["position"] = {"x": 0.80, "y": 0.5}
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Place agent facing east (toward the boundary object)
+        env.state.agent.facing = 1  # East
+
+        # Find the object and ensure agent is adjacent
+        obj = list(env.state.objects.values())[0]
+        obj_cell = obj.cell_id
+
+        # Move agent to the cell west of the object
+        west_of_obj = env.tiling.get_neighbor(obj_cell, "west")
+        assert west_of_obj is not None, "Object should not be at west edge"
+        env.state.agent.cell_id = west_of_obj
+        env.state.agent.facing = 1  # East
+
+        # Push should fail because destination (east of object) is off-grid or blocked
+        obs, reward, terminated, truncated, info = env.step(Action.PUSH)
+        assert info["invalid_action"] is True, "Push at boundary should be invalid"
+
+
+class TestBoundaryMovement:
+    """Tests for movement at grid boundaries."""
+
+    def test_cannot_move_off_north_edge(self):
+        """Cannot move north from top edge."""
+        task = create_simple_task(agent_pos=(0.5, 0.05))
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Set agent facing north
+        env.state.agent.facing = 0  # North
+
+        initial_cell = env.state.agent.cell_id
+        obs, reward, terminated, truncated, info = env.step(Action.FORWARD)
+
+        # Agent should stay in place at boundary
+        assert env.state.agent.cell_id == initial_cell
+        assert info.get("invalid_action") or info.get("boundary_collision")
+
+    def test_cannot_move_off_east_edge(self):
+        """Cannot move east from right edge."""
+        task = create_simple_task(agent_pos=(0.95, 0.5))
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Set agent facing east
+        env.state.agent.facing = 1  # East
+
+        initial_cell = env.state.agent.cell_id
+        obs, reward, terminated, truncated, info = env.step(Action.FORWARD)
+
+        # Agent should stay in place at boundary
+        assert env.state.agent.cell_id == initial_cell
+        assert info.get("invalid_action") or info.get("boundary_collision")
+
+    @pytest.mark.parametrize("tiling_type", ["square", "hex", "triangle"])
+    def test_all_boundary_directions(self, tiling_type):
+        """Test boundary behavior for all directions in each tiling."""
+        task = create_simple_task()
+        task["tiling"]["type"] = tiling_type
+
+        env = MultiGridEnv(task, tiling=tiling_type)
+        env.reset()
+
+        # Get a corner cell
+        corner_cells = [cid for cid, cell in env.tiling.cells.items()
+                        if len(cell.neighbors) == 2]
+        assert len(corner_cells) > 0, f"Should have corner cells in {tiling_type} grid"
+
+        # Move agent to corner
+        env.state.agent.cell_id = corner_cells[0]
+
+        # Try all possible facing directions
+        num_directions = len(env.tiling.directions)
+        for facing in range(num_directions):
+            env.state.agent.facing = facing
+            initial_cell = env.state.agent.cell_id
+
+            obs, reward, terminated, truncated, info = env.step(Action.FORWARD)
+
+            # Either agent moved to valid neighbor or stayed put
+            if env.state.agent.cell_id != initial_cell:
+                # Moved to valid neighbor
+                facing_dir = env.tiling.directions[facing]
+                assert facing_dir in env.tiling.cells[initial_cell].neighbors
+            else:
+                # Boundary collision - should be indicated in info
+                assert info.get("invalid_action") or info.get("boundary_collision"), \
+                    f"Boundary collision should be indicated for {tiling_type}"
+
+
+class TestObjectInteractions:
+    """Tests for object interaction edge cases."""
+
+    def _create_task_with_two_movables(self):
+        """Helper: task with two movable objects next to agent."""
+        return {
+            "task_id": "test_obj_interact",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "obj_a",
+                        "type": "movable",
+                        "color": "red",
+                        "position": {"x": 0.5, "y": 0.3},
+                        "size": 0.1,
+                    },
+                    {
+                        "id": "obj_b",
+                        "type": "movable",
+                        "color": "blue",
+                        "position": {"x": 0.5, "y": 0.7},
+                        "size": 0.1,
+                    },
+                ],
+                "agent": {"position": {"x": 0.5, "y": 0.5}, "facing": 0},
+            },
+            "goal": {"predicate": "reach_position", "position": {"x": 0.9, "y": 0.9}},
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}},
+        }
+
+    def test_pickup_while_holding(self):
+        """Picking up a second object while already holding one is invalid."""
+        task = self._create_task_with_two_movables()
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Face north toward obj_a and pick it up
+        env.state.agent.facing = 0  # North
+        obj_a = env.state.objects["obj_a"]
+
+        # Place agent directly south of obj_a
+        south_of_a = env.tiling.get_neighbor(obj_a.cell_id, "south")
+        if south_of_a:
+            env.state.agent.cell_id = south_of_a
+        env.state.agent.facing = 0  # North
+
+        obs, reward, terminated, truncated, info = env.step(Action.PICKUP)
+        assert env.state.agent.holding is not None, "Should have picked up obj_a"
+
+        # Now try to pick up obj_b — should fail
+        obj_b = env.state.objects["obj_b"]
+        south_of_b = env.tiling.get_neighbor(obj_b.cell_id, "south")
+        if south_of_b:
+            env.state.agent.cell_id = south_of_b
+        env.state.agent.facing = 0  # North
+
+        obs, reward, terminated, truncated, info = env.step(Action.PICKUP)
+        assert info["invalid_action"] is True, "Pickup while holding should be invalid"
+
+    def test_drop_with_nothing(self):
+        """Dropping when not holding anything is invalid."""
+        task = create_simple_task()
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Agent starts empty-handed
+        assert env.state.agent.holding is None
+
+        obs, reward, terminated, truncated, info = env.step(Action.DROP)
+        assert info["invalid_action"] is True, "Drop with nothing should be invalid"
+
+    def test_push_nothing(self):
+        """Pushing when facing an empty cell is invalid."""
+        task = create_simple_task(grid_size=10, agent_pos=(0.5, 0.5))
+        # Remove all objects so agent faces empty cells
+        task["scene"]["objects"] = []
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        env.state.agent.facing = 1  # East
+
+        obs, reward, terminated, truncated, info = env.step(Action.PUSH)
+        assert info["invalid_action"] is True, "Push nothing should be invalid"
+
+    def test_push_chain(self):
+        """Pushing object into another object (chain) is invalid."""
+        task = {
+            "task_id": "test_push_chain",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "block_near",
+                        "type": "movable",
+                        "color": "red",
+                        "position": {"x": 0.5, "y": 0.5},
+                        "size": 0.1,
+                    },
+                    {
+                        "id": "block_far",
+                        "type": "movable",
+                        "color": "blue",
+                        "position": {"x": 0.5, "y": 0.3},
+                        "size": 0.1,
+                    },
+                ],
+                "agent": {"position": {"x": 0.5, "y": 0.7}, "facing": 0},
+            },
+            "goal": {"predicate": "reach_position", "position": {"x": 0.9, "y": 0.9}},
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}},
+        }
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # Arrange: agent south of block_near, block_far north of block_near
+        block_near = env.state.objects["block_near"]
+        block_far = env.state.objects["block_far"]
+
+        # Ensure they're in a north-south line
+        north_of_near = env.tiling.get_neighbor(block_near.cell_id, "north")
+        south_of_near = env.tiling.get_neighbor(block_near.cell_id, "south")
+
+        # Place block_far directly north of block_near
+        block_far.cell_id = north_of_near
+        # Place agent directly south of block_near
+        env.state.agent.cell_id = south_of_near
+        env.state.agent.facing = 0  # North
+
+        obs, reward, terminated, truncated, info = env.step(Action.PUSH)
+        assert info["invalid_action"] is True, "Push chain should be invalid (destination blocked)"
+
+
+class TestZones:
+    """Tests for zone functionality (covered_cells and ObjectInZoneGoal)."""
+
+    def test_zone_at_boundary(self):
+        """Zone at grid corner: all covered cells must be valid."""
+        task = {
+            "task_id": "test_zone_boundary",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "zone_corner",
+                        "type": "zone",
+                        "color": "blue",
+                        "position": {"x": 0.01, "y": 0.01},
+                        "radius_hops": 2,
+                    }
+                ],
+                "agent": {"position": {"x": 0.5, "y": 0.5}, "facing": 0},
+            },
+            "goal": {"predicate": "reach_position", "position": {"x": 0.9, "y": 0.9}},
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "square", "grid_size": {"width": 8, "height": 8}},
+        }
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        zone = env.state.objects["zone_corner"]
+        assert len(zone.covered_cells) > 0, "Zone should have covered cells"
+
+        # All covered cells must exist in the tiling
+        for cell_id in zone.covered_cells:
+            assert cell_id in env.tiling.cells, f"Covered cell {cell_id} not in tiling"
+
+        # At a corner with radius 2, should have fewer cells than a center zone
+        # (boundary limits expansion)
+        assert len(zone.covered_cells) < (2 * 2 + 1) ** 2, \
+            "Corner zone should have fewer cells than an unbounded zone"
+
+    def test_zone_radius_zero(self):
+        """Zone with radius_hops=0 covers exactly one cell (the center)."""
+        task = {
+            "task_id": "test_zone_r0",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "zone_single",
+                        "type": "zone",
+                        "color": "green",
+                        "position": {"x": 0.5, "y": 0.5},
+                        "radius_hops": 0,
+                    }
+                ],
+                "agent": {"position": {"x": 0.2, "y": 0.2}, "facing": 0},
+            },
+            "goal": {"predicate": "reach_position", "position": {"x": 0.9, "y": 0.9}},
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "square", "grid_size": {"width": 8, "height": 8}},
+        }
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        zone = env.state.objects["zone_single"]
+        assert len(zone.covered_cells) == 1, \
+            f"Radius-0 zone should cover exactly 1 cell, got {len(zone.covered_cells)}"
+        assert zone.cell_id in zone.covered_cells, \
+            "Radius-0 zone's covered cell should be its own cell"
+
+    def test_consecutive_steps_in_zone(self):
+        """ObjectInZoneGoal with consecutive_steps=3 requires 3 checks in a row."""
+        from multigrid.goals import ObjectInZoneGoal
+
+        task = {
+            "task_id": "test_consec_zone",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [
+                    {
+                        "id": "zone_target",
+                        "type": "zone",
+                        "color": "blue",
+                        "position": {"x": 0.5, "y": 0.5},
+                        "radius_hops": 2,
+                    },
+                    {
+                        "id": "cube_red",
+                        "type": "movable",
+                        "color": "red",
+                        "position": {"x": 0.5, "y": 0.5},
+                        "size": 0.1,
+                    },
+                ],
+                "agent": {"position": {"x": 0.2, "y": 0.2}, "facing": 0},
+            },
+            "goal": {
+                "type": "object_in_zone",
+                "object_id": "cube_red",
+                "zone_id": "zone_target",
+                "consecutive_steps": 3,
+            },
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "square", "grid_size": {"width": 8, "height": 8}},
+        }
+
+        env = MultiGridEnv(task, tiling="square")
+        env.reset()
+
+        # The cube starts in the zone. Step WAIT 3 times — goal should trigger
+        # on the 3rd step (consecutive_steps=3).
+        for i in range(2):
+            obs, reward, terminated, truncated, info = env.step(Action.WAIT)
+            assert not terminated, f"Goal should not be achieved on step {i+1}"
+
+        obs, reward, terminated, truncated, info = env.step(Action.WAIT)
+        assert terminated, "Goal should be achieved after 3 consecutive steps in zone"
diff --git a/src/v1_1/tests/test_exotic_tilings.py b/src/v1_1/tests/test_exotic_tilings.py
new file mode 100644
index 00000000..0a64d9da
--- /dev/null
+++ b/src/v1_1/tests/test_exotic_tilings.py
@@ -0,0 +1,535 @@
+# test_exotic_tilings.py
+
+"""
+Tests for Archimedean tilings: 3-4-6-4 (Rhombitrihexagonal) and 4-8-8 (Truncated Square).
+"""
+
+import pytest
+import sys
+import os
+import numpy as np
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.tilings.archimedean_3464 import Archimedean3464Tiling
+from multigrid.tilings.archimedean_488 import Archimedean488Tiling
+from multigrid.env import TilingRegistry
+
+
+class TestArchimedean3464CellCount:
+    """Tests for 3-4-6-4 tiling cell counts.
+
+    The tiling is built by placing hexagons on a lattice and generating
+    surrounding squares (6 per hex) and triangles (6 per hex), then
+    deduplicating shared tiles. Each hex has exactly width*height hexagons.
+    Squares are shared between 2 hexagons and triangles between 3, so
+    the total depends on boundary effects. For a 1x1 grid: 1+6+6=13.
+    """
+
+    @pytest.mark.parametrize("width,height,expected_hexes", [
+        (1, 1, 1),
+        (2, 2, 4),
+        (3, 3, 9),
+        (2, 4, 8),
+        (4, 2, 8),
+    ])
+    def test_hex_count(self, width, height, expected_hexes):
+        """Number of hexagons equals width * height."""
+        tiling = Archimedean3464Tiling()
+        cells = tiling.generate_graph(width, height, seed=42)
+        hex_count = sum(
+            1 for c in cells.values()
+            if c.tiling_coords["tile_type"] == "hexagon"
+        )
+        assert hex_count == expected_hexes, (
+            f"Expected {expected_hexes} hexagons for {width}x{height} grid, "
+            f"got {hex_count}"
+        )
+
+    @pytest.mark.parametrize("width,height", [
+        (1, 1),
+        (2, 2),
+        (3, 3),
+        (2, 4),
+        (4, 2),
+    ])
+    def test_total_cell_count_positive(self, width, height):
+        """Total cell count is greater than number of hexagons."""
+        tiling = Archimedean3464Tiling()
+        cells = tiling.generate_graph(width, height, seed=42)
+        n_hex = width * height
+        assert len(cells) > n_hex, (
+            f"Total cells ({len(cells)}) should exceed hex count ({n_hex})"
+        )
+
+
+class TestArchimedean488CellCount:
+    """Tests for 4-8-8 tiling cell counts."""
+
+    @pytest.mark.parametrize("width,height", [
+        (2, 2),
+        (3, 3),
+        (4, 4),
+        (3, 5),
+        (5, 3),
+    ])
+    def test_cell_count(self, width, height):
+        """Cell count equals width * height (one tile per grid position)."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(width, height, seed=42)
+        expected = width * height
+        assert len(cells) == expected, (
+            f"Expected {expected} cells for {width}x{height} grid, got {len(cells)}"
+        )
+
+
+class TestAdjacencySymmetry:
+    """If A neighbors B, then B must neighbor A."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_adjacency_symmetry(self, tiling_class):
+        """Adjacency relation is symmetric."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        for cell_id, cell in cells.items():
+            for direction, neighbor_id in cell.neighbors.items():
+                assert neighbor_id in cells, (
+                    f"Neighbor {neighbor_id} of {cell_id} not in cells"
+                )
+                neighbor = cells[neighbor_id]
+                assert cell_id in neighbor.neighbors.values(), (
+                    f"Asymmetric adjacency: {cell_id} -> {neighbor_id} "
+                    f"via {direction}, but {neighbor_id} does not neighbor "
+                    f"{cell_id}. {neighbor_id} neighbors: {neighbor.neighbors}"
+                )
+
+
+class TestVariableNeighborCounts:
+    """Tiles have the correct number of neighbors based on their polygon type."""
+
+    def test_3464_neighbor_counts(self):
+        """3-4-6-4: triangles have <=3, squares <=4, hexagons <=6 neighbors."""
+        tiling = Archimedean3464Tiling()
+        # Use larger grid so interior cells have full neighbor sets
+        cells = tiling.generate_graph(width=4, height=4, seed=0)
+
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            tile_type = tc["tile_type"]
+            n_neighbors = len(cell.neighbors)
+
+            if tile_type == "triangle":
+                assert n_neighbors <= 3, (
+                    f"Triangle {cell_id} has {n_neighbors} neighbors (max 3)"
+                )
+            elif tile_type == "square":
+                assert n_neighbors <= 4, (
+                    f"Square {cell_id} has {n_neighbors} neighbors (max 4)"
+                )
+            elif tile_type == "hexagon":
+                assert n_neighbors <= 6, (
+                    f"Hexagon {cell_id} has {n_neighbors} neighbors (max 6)"
+                )
+
+    def test_3464_has_all_tile_types(self):
+        """3-4-6-4 tiling contains triangles, squares, and hexagons."""
+        tiling = Archimedean3464Tiling()
+        cells = tiling.generate_graph(width=2, height=2, seed=0)
+
+        tile_types = set()
+        for cell in cells.values():
+            tile_types.add(cell.tiling_coords["tile_type"])
+
+        assert "triangle" in tile_types, "Missing triangles in 3-4-6-4 tiling"
+        assert "square" in tile_types, "Missing squares in 3-4-6-4 tiling"
+        assert "hexagon" in tile_types, "Missing hexagons in 3-4-6-4 tiling"
+
+    def test_488_neighbor_counts(self):
+        """4-8-8: squares have <=4, octagons have <=8 neighbors."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(width=5, height=5, seed=0)
+
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            tile_type = tc["tile_type"]
+            n_neighbors = len(cell.neighbors)
+
+            if tile_type == "square":
+                assert n_neighbors <= 4, (
+                    f"Square {cell_id} has {n_neighbors} neighbors (max 4)"
+                )
+            elif tile_type == "octagon":
+                assert n_neighbors <= 8, (
+                    f"Octagon {cell_id} has {n_neighbors} neighbors (max 8)"
+                )
+
+    def test_488_has_both_tile_types(self):
+        """4-8-8 tiling contains both squares and octagons."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        tile_types = set()
+        for cell in cells.values():
+            tile_types.add(cell.tiling_coords["tile_type"])
+
+        assert "square" in tile_types, "Missing squares in 4-8-8 tiling"
+        assert "octagon" in tile_types, "Missing octagons in 4-8-8 tiling"
+
+    def test_488_interior_octagons_have_8_neighbors(self):
+        """Interior octagons in a large-enough grid should have 8 neighbors."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(width=7, height=7, seed=0)
+
+        # Check interior cells (not on boundary rows/cols)
+        found_full_octagon = False
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            if tc["tile_type"] == "octagon":
+                row, col = cell.row, cell.col
+                if 1 <= row <= 5 and 1 <= col <= 5:
+                    n = len(cell.neighbors)
+                    if n == 8:
+                        found_full_octagon = True
+
+        assert found_full_octagon, (
+            "No interior octagon found with full 8 neighbors in 7x7 grid"
+        )
+
+    def test_488_interior_squares_have_4_neighbors(self):
+        """Interior squares should have 4 neighbors."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(width=7, height=7, seed=0)
+
+        found_full_square = False
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            if tc["tile_type"] == "square":
+                row, col = cell.row, cell.col
+                if 1 <= row <= 5 and 1 <= col <= 5:
+                    n = len(cell.neighbors)
+                    if n == 4:
+                        found_full_square = True
+
+        assert found_full_square, (
+            "No interior square found with full 4 neighbors in 7x7 grid"
+        )
+
+
+class TestCanonicalCoordinates:
+    """All canonical coordinates should be in [0,1]."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_canonical_in_unit_interval(self, tiling_class):
+        """All cell positions (position_hint) are in [0,1]."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=4, height=4, seed=42)
+
+        for cell_id, cell in cells.items():
+            x, y = cell.position_hint
+            assert 0.0 <= x <= 1.0, (
+                f"Cell {cell_id} x={x} out of [0,1]"
+            )
+            assert 0.0 <= y <= 1.0, (
+                f"Cell {cell_id} y={y} out of [0,1]"
+            )
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_cell_to_canonical_matches_hint(self, tiling_class):
+        """cell_to_canonical returns the same as position_hint."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        for cell_id, cell in cells.items():
+            pos = tiling.cell_to_canonical(cell_id)
+            assert abs(pos[0] - cell.position_hint[0]) < 1e-10
+            assert abs(pos[1] - cell.position_hint[1]) < 1e-10
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_canonical_to_cell_roundtrip(self, tiling_class):
+        """canonical_to_cell(cell_to_canonical(id)) should return the same cell."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        for cell_id in cells:
+            x, y = tiling.cell_to_canonical(cell_id)
+            recovered = tiling.canonical_to_cell(x, y)
+            assert recovered == cell_id, (
+                f"Roundtrip failed for {cell_id}: "
+                f"({x:.4f}, {y:.4f}) -> {recovered}"
+            )
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_all_vertices_in_unit_interval(self, tiling_class):
+        """All polygon vertices should be in [0,1]."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            for vx, vy in tc["vertices"]:
+                assert -0.01 <= vx <= 1.01, (
+                    f"Cell {cell_id} vertex x={vx} out of range"
+                )
+                assert -0.01 <= vy <= 1.01, (
+                    f"Cell {cell_id} vertex y={vy} out of range"
+                )
+
+
+class TestRendering:
+    """Test that rendering produces valid, non-zero RGB arrays."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_rendering_produces_nonzero_image(self, tiling_class):
+        """Rendering should produce a non-zero RGB array."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=3, height=3, seed=0)
+
+        # Import rendering
+        from multigrid.rendering import render_multigrid, MinimalRenderer
+
+        # We need a minimal WorldState-like object for rendering
+        # Create a simple stub
+        class StubAgent:
+            cell_id = list(cells.keys())[0]
+            facing = 0
+            holding = None
+
+        class StubState:
+            agent = StubAgent()
+            objects = {}
+            goal = None
+
+        frame = render_multigrid(StubState(), tiling, width=256, height=256)
+        assert isinstance(frame, np.ndarray)
+        assert frame.shape == (256, 256, 3)
+        assert frame.dtype == np.uint8
+        # Should not be all-black (background is light gray)
+        assert frame.sum() > 0, "Rendered frame is all black"
+        # Should have some variation (not a solid color)
+        assert frame.std() > 0, "Rendered frame has no variation"
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_rendering_different_sizes(self, tiling_class):
+        """Rendering at different resolutions should all produce valid images."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=2, height=2, seed=0)
+
+        from multigrid.rendering import render_multigrid
+
+        class StubAgent:
+            cell_id = list(cells.keys())[0]
+            facing = 0
+            holding = None
+
+        class StubState:
+            agent = StubAgent()
+            objects = {}
+            goal = None
+
+        for size in [64, 128, 512]:
+            frame = render_multigrid(StubState(), tiling, width=size, height=size)
+            assert frame.shape == (size, size, 3)
+            assert frame.sum() > 0
+
+
+class TestSeedDeterminism:
+    """Same seed should produce identical graphs."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_seed_determinism(self, tiling_class):
+        """Same seed produces identical graph."""
+        tiling1 = tiling_class()
+        tiling2 = tiling_class()
+
+        cells1 = tiling1.generate_graph(3, 3, seed=12345)
+        cells2 = tiling2.generate_graph(3, 3, seed=12345)
+
+        assert set(cells1.keys()) == set(cells2.keys()), (
+            "Cell ID sets differ between identical seeds"
+        )
+
+        for cell_id in cells1:
+            assert cells1[cell_id].neighbors == cells2[cell_id].neighbors, (
+                f"Neighbors differ for {cell_id}"
+            )
+            pos1 = cells1[cell_id].position_hint
+            pos2 = cells2[cell_id].position_hint
+            assert abs(pos1[0] - pos2[0]) < 1e-12
+            assert abs(pos1[1] - pos2[1]) < 1e-12
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_different_seeds_same_result(self, tiling_class):
+        """Since these tilings are deterministic, different seeds should
+        still produce the same graph (seed is unused)."""
+        tiling1 = tiling_class()
+        tiling2 = tiling_class()
+
+        cells1 = tiling1.generate_graph(3, 3, seed=0)
+        cells2 = tiling2.generate_graph(3, 3, seed=99999)
+
+        assert set(cells1.keys()) == set(cells2.keys())
+        for cell_id in cells1:
+            assert cells1[cell_id].neighbors == cells2[cell_id].neighbors
+
+
+class TestDistance:
+    """Graph distance (BFS) computation tests."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_distance_self_is_zero(self, tiling_class):
+        """Distance from a cell to itself is 0."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(3, 3, seed=0)
+
+        for cell_id in list(cells.keys())[:5]:
+            assert tiling.distance(cell_id, cell_id) == 0
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_distance_neighbors_is_one(self, tiling_class):
+        """Distance between direct neighbors is 1."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(3, 3, seed=0)
+
+        for cell_id, cell in list(cells.items())[:5]:
+            for neighbor_id in cell.neighbors.values():
+                assert tiling.distance(cell_id, neighbor_id) == 1
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_distance_symmetry(self, tiling_class):
+        """Distance(A, B) == Distance(B, A)."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(3, 3, seed=0)
+
+        cell_ids = list(cells.keys())
+        for i in range(min(5, len(cell_ids))):
+            for j in range(i + 1, min(5, len(cell_ids))):
+                d1 = tiling.distance(cell_ids[i], cell_ids[j])
+                d2 = tiling.distance(cell_ids[j], cell_ids[i])
+                assert d1 == d2, (
+                    f"Asymmetric distance: {cell_ids[i]}<->{cell_ids[j]}: "
+                    f"{d1} vs {d2}"
+                )
+
+
+class TestGetNeighborBeyondEdgeCount:
+    """get_neighbor returns None for directions beyond cell's edge count."""
+
+    def test_3464_triangle_extra_directions(self):
+        """Triangles in 3-4-6-4 should return None for edge_3..edge_5."""
+        tiling = Archimedean3464Tiling()
+        cells = tiling.generate_graph(3, 3, seed=0)
+
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            if tc["tile_type"] == "triangle":
+                n_sides = tc["n_sides"]
+                # Directions beyond actual edge count should be None
+                for i in range(n_sides, 6):
+                    result = tiling.get_neighbor(cell_id, f"edge_{i}")
+                    assert result is None, (
+                        f"Triangle {cell_id} edge_{i} should be None, got {result}"
+                    )
+                break  # Only need to test one triangle
+
+    def test_488_square_extra_directions(self):
+        """Squares in 4-8-8 should return None for edge_4..edge_7."""
+        tiling = Archimedean488Tiling()
+        cells = tiling.generate_graph(4, 4, seed=0)
+
+        for cell_id, cell in cells.items():
+            tc = cell.tiling_coords
+            if tc["tile_type"] == "square":
+                n_sides = tc["n_sides"]
+                for i in range(n_sides, 8):
+                    result = tiling.get_neighbor(cell_id, f"edge_{i}")
+                    assert result is None, (
+                        f"Square {cell_id} edge_{i} should be None, got {result}"
+                    )
+                break  # Only need to test one square
+
+
+class TestTilingRegistry:
+    """Test that new tilings are registered properly."""
+
+    def test_3464_registered(self):
+        """3-4-6-4 tiling can be obtained from registry."""
+        tiling = TilingRegistry.get("3464")
+        assert tiling.name == "3464"
+        assert isinstance(tiling, Archimedean3464Tiling)
+
+    def test_488_registered(self):
+        """4-8-8 tiling can be obtained from registry."""
+        tiling = TilingRegistry.get("488")
+        assert tiling.name == "488"
+        assert isinstance(tiling, Archimedean488Tiling)
+
+
+class TestConnectivity:
+    """Test that the tilings produce connected graphs."""
+
+    @pytest.mark.parametrize("tiling_class", [
+        Archimedean3464Tiling,
+        Archimedean488Tiling,
+    ])
+    def test_graph_is_connected(self, tiling_class):
+        """All cells should be reachable from any starting cell (connected graph)."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(3, 3, seed=0)
+
+        if len(cells) == 0:
+            return
+
+        # BFS from first cell
+        start = next(iter(cells))
+        visited = {start}
+        from collections import deque
+        queue = deque([start])
+
+        while queue:
+            current = queue.popleft()
+            for neighbor_id in cells[current].neighbors.values():
+                if neighbor_id not in visited:
+                    visited.add(neighbor_id)
+                    queue.append(neighbor_id)
+
+        assert len(visited) == len(cells), (
+            f"Graph is not connected: visited {len(visited)} of {len(cells)} cells"
+        )
diff --git a/src/v1_1/tests/test_model_interface.py b/src/v1_1/tests/test_model_interface.py
new file mode 100644
index 00000000..629c21bd
--- /dev/null
+++ b/src/v1_1/tests/test_model_interface.py
@@ -0,0 +1,277 @@
+"""Tests for model interface, evaluation harness, and NL domain."""
+
+import pytest
+import sys
+import os
+import json
+import tempfile
+from pathlib import Path
+
+_v1_1_dir = str(Path(__file__).resolve().parent.parent)
+if _v1_1_dir not in sys.path:
+    sys.path.insert(0, _v1_1_dir)
+
+import numpy as np
+from model_interface import ModelInterface, ModelInput, ModelOutput, RandomModelInterface
+from evaluation_harness import EvaluationHarness, TierMetrics, EvaluationResult
+from gridworld.task_spec import TaskSpecification
+from gridworld.actions import ACTION_NAMES
+
+
+class TestModelInput:
+    def test_create_model_input(self):
+        inp = ModelInput(
+            image=np.zeros((64, 64, 3), dtype=np.uint8),
+            text_prompt="Navigate to the goal",
+            action_space=ACTION_NAMES,
+            step_number=1,
+            max_steps=100,
+        )
+        assert inp.image.shape == (64, 64, 3)
+        assert inp.step_number == 1
+
+    def test_optional_context(self):
+        inp = ModelInput(
+            image=np.zeros((64, 64, 3), dtype=np.uint8),
+            text_prompt="test",
+            action_space={0: "left"},
+            step_number=0,
+            max_steps=10,
+            additional_context="Extra info",
+        )
+        assert inp.additional_context == "Extra info"
+
+
+class TestRandomModel:
+    def test_random_model_name(self):
+        model = RandomModelInterface(seed=42)
+        assert model.model_name == "random"
+
+    def test_random_model_predict(self):
+        model = RandomModelInterface(seed=42)
+        inp = ModelInput(
+            image=np.zeros((64, 64, 3), dtype=np.uint8),
+            text_prompt="test",
+            action_space=ACTION_NAMES,
+            step_number=1,
+            max_steps=100,
+        )
+        output = model.predict(inp)
+        assert isinstance(output, ModelOutput)
+        assert output.action in ACTION_NAMES
+
+    def test_random_model_deterministic(self):
+        """Same seed should produce same sequence."""
+        model1 = RandomModelInterface(seed=123)
+        model2 = RandomModelInterface(seed=123)
+        inp = ModelInput(
+            image=np.zeros((64, 64, 3), dtype=np.uint8),
+            text_prompt="test",
+            action_space=ACTION_NAMES,
+            step_number=1,
+            max_steps=100,
+        )
+        actions1 = [model1.predict(inp).action for _ in range(10)]
+        actions2 = [model2.predict(inp).action for _ in range(10)]
+        assert actions1 == actions2
+
+    def test_random_model_batch(self):
+        model = RandomModelInterface(seed=42)
+        inp = ModelInput(
+            image=np.zeros((64, 64, 3), dtype=np.uint8),
+            text_prompt="test",
+            action_space=ACTION_NAMES,
+            step_number=1,
+            max_steps=100,
+        )
+        outputs = model.predict_batch([inp, inp, inp])
+        assert len(outputs) == 3
+        assert all(isinstance(o, ModelOutput) for o in outputs)
+
+
+class TestEvaluationHarness:
+    @pytest.fixture
+    def simple_spec(self):
+        return TaskSpecification.from_dict({
+            "task_id": "test_simple",
+            "seed": 42,
+            "difficulty_tier": 1,
+            "maze": {
+                "dimensions": [6, 6],
+                "walls": [],
+                "start": [1, 1],
+                "goal": [4, 4],
+            },
+            "goal": {"type": "reach_position", "target": [4, 4]},
+            "max_steps": 20,
+        })
+
+    def test_evaluate_single_task(self, simple_spec):
+        model = RandomModelInterface(seed=42)
+        harness = EvaluationHarness(model)
+        result = harness.evaluate_task(simple_spec)
+        assert result.task_id == "test_simple"
+        assert result.steps_taken > 0
+        assert result.steps_taken <= 20
+        harness.close()
+
+    def test_evaluate_tier(self):
+        model = RandomModelInterface(seed=42)
+        harness = EvaluationHarness(model)
+        task_dir = str(Path(__file__).resolve().parent.parent / "gridworld" / "tasks")
+        metrics = harness.evaluate_tier(tier=1, task_dir=task_dir)
+        assert isinstance(metrics, TierMetrics)
+        assert metrics.tier == 1
+        assert metrics.num_tasks == 3  # 3 tier1 tasks
+        assert 0.0 <= metrics.success_rate <= 1.0
+        harness.close()
+
+    def test_evaluate_all(self):
+        model = RandomModelInterface(seed=42)
+        harness = EvaluationHarness(model)
+        task_dir = str(Path(__file__).resolve().parent.parent / "gridworld" / "tasks")
+        result = harness.evaluate_all(task_dir=task_dir, tiers=[1])
+        assert isinstance(result, EvaluationResult)
+        assert result.model_name == "random"
+        assert 1 in result.tier_metrics
+        harness.close()
+
+    def test_result_serialization(self):
+        model = RandomModelInterface(seed=42)
+        harness = EvaluationHarness(model)
+        task_dir = str(Path(__file__).resolve().parent.parent / "gridworld" / "tasks")
+        result = harness.evaluate_all(task_dir=task_dir, tiers=[1])
+
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as f:
+            result.save(f.name)
+            with open(f.name) as fp:
+                data = json.load(fp)
+            assert "model_name" in data
+            assert "tier_metrics" in data
+            os.unlink(f.name)
+        harness.close()
+
+
+class TestNLActionParser:
+    @pytest.fixture
+    def parser(self):
+        from nl_domain.nl_action_parser import NLActionParser
+        return NLActionParser()
+
+    def test_forward_commands(self, parser):
+        for cmd in ["go forward", "move forward", "forward", "walk ahead", "advance"]:
+            actions = parser.parse(cmd)
+            assert actions == [2], f"'{cmd}' should parse to forward (2), got {actions}"
+
+    def test_turn_commands(self, parser):
+        assert parser.parse("turn left") == [0]
+        assert parser.parse("turn right") == [1]
+        assert parser.parse("rotate left") == [0]
+
+    def test_interaction_commands(self, parser):
+        assert parser.parse("pick up") == [3]
+        assert parser.parse("grab") == [3]
+        assert parser.parse("drop") == [4]
+        assert parser.parse("toggle") == [5]
+        assert parser.parse("open") == [5]
+        assert parser.parse("press") == [5]
+
+    def test_wait_commands(self, parser):
+        for cmd in ["wait", "stay", "do nothing", "done"]:
+            actions = parser.parse(cmd)
+            assert actions == [6], f"'{cmd}' should parse to done (6), got {actions}"
+
+    def test_compass_north(self, parser):
+        """Moving north when facing right should turn left then forward."""
+        # Agent facing right (0), need to face up (3)
+        # Right to up: turn left once (CCW: 0->3 is one left turn)
+        actions = parser.parse("move north", agent_facing=0)
+        assert actions[-1] == 2  # Last action should be forward
+        assert 0 in actions  # Should include turn_left
+
+    def test_compass_same_direction(self, parser):
+        """Moving north when already facing north should just go forward."""
+        actions = parser.parse("move north", agent_facing=3)
+        assert actions == [2]  # Just forward
+
+    def test_compound_commands(self, parser):
+        actions = parser.parse("turn left then go forward")
+        assert actions == [0, 2]
+
+    def test_empty_command(self, parser):
+        actions = parser.parse("")
+        assert actions == [6]  # Wait
+
+
+class TestNLGridWorldEnv:
+    def test_nl_env_basic(self):
+        from nl_domain.nl_env import NLGridWorldEnv
+        spec = TaskSpecification.from_dict({
+            "task_id": "test_nl",
+            "seed": 42,
+            "difficulty_tier": 1,
+            "maze": {
+                "dimensions": [6, 6],
+                "walls": [],
+                "start": [1, 1],
+                "goal": [4, 4],
+            },
+            "goal": {"type": "reach_position", "target": [4, 4]},
+            "max_steps": 20,
+        })
+
+        env = NLGridWorldEnv(spec)
+        obs, info = env.reset(seed=42)
+        assert obs is not None
+        assert "mission" in info
+
+        obs, reward, term, trunc, info = env.step("go forward")
+        assert obs is not None
+        assert "parsed_actions" in info
+        assert info["parsed_actions"] == [2]  # forward
+
+        env.close()
+
+
+class TestCrossDomain:
+    def test_canonical_roundtrip(self):
+        from cross_domain.canonical_task_spec import CanonicalTaskSpec, CanonicalGoal, CanonicalObject
+        from cross_domain.gridworld_adapter import GridWorldDomainAdapter
+
+        spec = TaskSpecification.from_dict({
+            "task_id": "test_roundtrip",
+            "seed": 42,
+            "difficulty_tier": 1,
+            "maze": {
+                "dimensions": [10, 10],
+                "walls": [[3, 3], [3, 4]],
+                "start": [1, 1],
+                "goal": [8, 8],
+            },
+            "mechanisms": {
+                "keys": [{"id": "k1", "position": [2, 2], "color": "yellow"}],
+            },
+            "goal": {"type": "reach_position", "target": [8, 8]},
+            "max_steps": 100,
+        })
+
+        adapter = GridWorldDomainAdapter()
+        canonical = adapter.to_canonical(spec)
+
+        assert canonical.task_id == "test_roundtrip"
+        assert canonical.difficulty == 1
+        assert 0.0 <= canonical.agent_start[0] <= 1.0
+        assert 0.0 <= canonical.agent_start[1] <= 1.0
+        assert canonical.goal.goal_type == "reach"
+        assert len(canonical.objects) > 0  # walls + key
+
+        # Find the key in canonical objects
+        key_objs = [o for o in canonical.objects if o.obj_type == "collectible"]
+        assert len(key_objs) == 1
+        assert key_objs[0].id == "k1"
+
+    def test_gui_action_dataclass(self):
+        from cross_domain.domain_adapter import GUIAction
+        action = GUIAction(action_type="mouse_click", x=0.5, y=0.3)
+        assert action.action_type == "mouse_click"
+        assert action.x == 0.5
diff --git a/src/v1_1/tests/test_multigrid_partial_obs.py b/src/v1_1/tests/test_multigrid_partial_obs.py
new file mode 100644
index 00000000..300ba32e
--- /dev/null
+++ b/src/v1_1/tests/test_multigrid_partial_obs.py
@@ -0,0 +1,300 @@
+"""Tests for MultiGrid partial observability (view cone and fog of war)."""
+
+import pytest
+import sys
+import os
+import math
+import numpy as np
+from pathlib import Path
+
+_v1_1_dir = str(Path(__file__).resolve().parent.parent)
+if _v1_1_dir not in sys.path:
+    sys.path.insert(0, _v1_1_dir)
+
+from multigrid.env import MultiGridEnv
+from multigrid.visibility import (
+    compute_visible_cells,
+    _facing_to_angle,
+    _is_in_view_cone,
+    _is_cell_blocking,
+)
+
+
+# --- Helpers ---
+
+def _make_spec(width=5, height=5, walls=None, objects=None, goal_x=0.9, goal_y=0.9,
+               agent_x=0.3, agent_y=0.3, agent_facing=0):
+    """Create a minimal MultiGrid task spec dict."""
+    spec = {
+        "task_id": "test_partial_obs",
+        "seed": 1,
+        "tiling": {
+            "type": "square",
+            "grid_size": {"width": width, "height": height},
+        },
+        "scene": {
+            "agent": {
+                "position": {"x": agent_x, "y": agent_y},
+                "facing": agent_facing,
+            },
+            "objects": objects or [],
+            "walls": walls or [],
+        },
+        "goal": {
+            "type": "reach_position",
+            "target": {"x": goal_x, "y": goal_y},
+        },
+        "limits": {"max_steps": 50},
+    }
+    return spec
+
+
+# --- Tests ---
+
+class TestFullObservability:
+    """Full observability: all cells should be visible."""
+
+    def test_all_cells_visible(self):
+        spec = _make_spec()
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           observability_mode="full")
+        obs, info = env.reset(seed=42)
+        assert env.state.visible_cells == set(env.tiling.cells.keys())
+        assert env.state.explored_cells == set(env.tiling.cells.keys())
+
+    def test_full_obs_no_visibility_info_in_info(self):
+        spec = _make_spec()
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           observability_mode="full")
+        obs, info = env.reset(seed=42)
+        assert "visible_cells" not in info
+
+    @pytest.mark.parametrize("tiling", ["square", "hex"])
+    def test_full_obs_all_tilings(self, tiling):
+        spec = _make_spec()
+        spec["tiling"]["type"] = tiling
+        env = MultiGridEnv(spec, tiling=tiling, render_mode="rgb_array",
+                           observability_mode="full")
+        obs, info = env.reset(seed=42)
+        assert len(env.state.visible_cells) == len(env.tiling.cells)
+
+
+class TestViewCone:
+    """View cone: agent only sees cells in front."""
+
+    def test_fewer_visible_than_total(self):
+        spec = _make_spec(width=8, height=8)
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="view_cone")
+        obs, info = env.reset(seed=42)
+        # With radius 2, should see fewer cells than total
+        assert len(env.state.visible_cells) < len(env.tiling.cells)
+        assert len(env.state.visible_cells) > 0
+        # Agent's own cell must always be visible
+        assert env.state.agent.cell_id in env.state.visible_cells
+
+    def test_visible_cells_change_on_turn(self):
+        spec = _make_spec(width=8, height=8)
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=3,
+                           observability_mode="view_cone")
+        obs, info = env.reset(seed=42)
+        visible_before = set(env.state.visible_cells)
+
+        # Turn right (action 3 = TURN_RIGHT)
+        env.step(3)
+        visible_after = set(env.state.visible_cells)
+
+        # Visible cells should differ after turning
+        assert visible_before != visible_after
+
+    @pytest.mark.parametrize("tiling", ["square", "hex"])
+    def test_view_cone_different_tilings(self, tiling):
+        spec = _make_spec(width=6, height=6)
+        spec["tiling"]["type"] = tiling
+        env = MultiGridEnv(spec, tiling=tiling, render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="view_cone")
+        obs, info = env.reset(seed=42)
+        assert len(env.state.visible_cells) < len(env.tiling.cells)
+        assert env.state.agent.cell_id in env.state.visible_cells
+
+
+class TestWallBlocking:
+    """Walls should block BFS visibility propagation."""
+
+    def test_wall_blocks_visibility(self):
+        # Place a wall object between agent and some cells
+        spec = _make_spec(width=7, height=7, objects=[
+            {"id": "wall_1", "type": "wall", "color": "grey",
+             "position": {"x": 0.5, "y": 0.3}},
+        ])
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=5,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+
+        # The wall cell itself should be visible (walls are visible,
+        # just block propagation beyond them)
+        wall_cell = env.tiling.canonical_to_cell(0.5, 0.3)
+        if wall_cell in env.tiling.cells:
+            # Just check visibility is non-trivial (less than all cells)
+            assert len(env.state.visible_cells) < len(env.tiling.cells)
+
+    def test_closed_door_blocks(self):
+        spec = _make_spec(width=7, height=7, objects=[
+            {"id": "door_1", "type": "door", "color": "red",
+             "position": {"x": 0.5, "y": 0.3}, "is_locked": True},
+        ])
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=5,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+
+        # With a locked door blocking, should see fewer cells
+        assert len(env.state.visible_cells) < len(env.tiling.cells)
+
+
+class TestFogOfWar:
+    """Fog of war: explored set grows monotonically."""
+
+    def test_explored_grows_on_movement(self):
+        spec = _make_spec(width=8, height=8)
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+        explored_before = len(env.state.explored_cells)
+
+        # Move forward (action 0 = FORWARD)
+        env.step(0)
+        explored_after = len(env.state.explored_cells)
+
+        # Explored should be >= (monotonically growing)
+        assert explored_after >= explored_before
+
+    def test_explored_never_shrinks(self):
+        spec = _make_spec(width=8, height=8)
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+
+        # Take a sequence of actions and track explored
+        prev_explored = set(env.state.explored_cells)
+        actions = [0, 3, 0, 2, 0, 3, 0]  # forward, turn_right, forward, etc.
+        for action in actions:
+            env.step(action)
+            current_explored = set(env.state.explored_cells)
+            # Previous explored must be a subset of current
+            assert prev_explored.issubset(current_explored), \
+                f"Explored cells shrank: lost {prev_explored - current_explored}"
+            prev_explored = current_explored
+
+    def test_fog_of_war_omnidirectional(self):
+        """Fog of war should be omnidirectional (no facing filter)."""
+        spec = _make_spec(width=6, height=6)
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+        visible_facing_0 = set(env.state.visible_cells)
+
+        # Turn right
+        env.step(3)
+        visible_after_turn = set(env.state.visible_cells)
+
+        # In fog of war mode (omnidirectional), visible cells should be the same
+        # after turning (only position matters, not facing)
+        assert visible_facing_0 == visible_after_turn
+
+
+class TestRendering:
+    """Partial observability should affect rendered images."""
+
+    def test_partial_obs_renders_differently(self):
+        spec = _make_spec(width=8, height=8)
+
+        # Full observability render
+        env_full = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                                observability_mode="full")
+        env_full.reset(seed=42)
+        img_full = env_full.render()
+
+        # Partial observability render
+        env_partial = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                                   partial_obs=True, obs_radius=2,
+                                   observability_mode="view_cone")
+        env_partial.reset(seed=42)
+        img_partial = env_partial.render()
+
+        # Images should differ (partial obs hides some cells)
+        assert not np.array_equal(img_full, img_partial)
+
+    def test_render_produces_valid_image(self):
+        spec = _make_spec()
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="fog_of_war")
+        obs, info = env.reset(seed=42)
+        img = env.render()
+        assert img.shape == (640, 640, 3)
+        assert img.dtype == np.uint8
+
+
+class TestVisibilityHelpers:
+    """Unit tests for visibility module helper functions."""
+
+    def test_facing_to_angle_square(self):
+        from multigrid.tilings import SquareTiling
+        tiling = SquareTiling()
+        tiling.generate_graph(5, 5, seed=0)
+
+        # Square: 0=N (up), 1=E (right), 2=S (down), 3=W (left)
+        assert abs(_facing_to_angle(0, tiling) - (-math.pi / 2)) < 0.01
+        assert abs(_facing_to_angle(1, tiling) - 0.0) < 0.01
+
+    def test_is_in_view_cone_directly_ahead(self):
+        agent_pos = (0.5, 0.5)
+        cell_ahead = (0.5, 0.3)  # North (up = -y)
+        facing = -math.pi / 2  # North
+
+        assert _is_in_view_cone(agent_pos, cell_ahead, facing, math.pi / 2)
+
+    def test_is_in_view_cone_behind(self):
+        agent_pos = (0.5, 0.5)
+        cell_behind = (0.5, 0.8)  # South (down = +y)
+        facing = -math.pi / 2  # North
+
+        assert not _is_in_view_cone(agent_pos, cell_behind, facing, math.pi / 4)
+
+    def test_is_cell_blocking_empty(self):
+        """Empty cell should not block."""
+        from multigrid.world import WorldState
+        from multigrid.tilings import SquareTiling
+
+        tiling = SquareTiling()
+        tiling.generate_graph(5, 5, seed=0)
+        state = WorldState(tiling)
+
+        cell_id = list(tiling.cells.keys())[0]
+        assert not _is_cell_blocking(cell_id, state)
+
+
+class TestInfoDict:
+    """Test that info dict includes visibility counts."""
+
+    def test_info_has_visibility_counts(self):
+        spec = _make_spec()
+        env = MultiGridEnv(spec, tiling="square", render_mode="rgb_array",
+                           partial_obs=True, obs_radius=2,
+                           observability_mode="view_cone")
+        obs, info = env.reset(seed=42)
+
+        assert "visible_cells" in info
+        assert "explored_cells" in info
+        assert "total_cells" in info
+        assert info["visible_cells"] > 0
+        assert info["explored_cells"] > 0
+        assert info["total_cells"] == len(env.tiling.cells)
diff --git a/src/v1_1/tests/test_partial_observability.py b/src/v1_1/tests/test_partial_observability.py
new file mode 100644
index 00000000..f9105999
--- /dev/null
+++ b/src/v1_1/tests/test_partial_observability.py
@@ -0,0 +1,344 @@
+"""Tests for partial observability (view cone and fog of war)."""
+
+import pytest
+import sys
+import os
+from pathlib import Path
+
+_v1_1_dir = str(Path(__file__).resolve().parent.parent)
+if _v1_1_dir not in sys.path:
+    sys.path.insert(0, _v1_1_dir)
+
+from gridworld.task_spec import TaskSpecification, Rules
+from gridworld.task_parser import TaskParser
+from gridworld.backends.minigrid_backend import MiniGridBackend
+from gridworld.actions import MiniGridActions
+
+
+# --- Fixtures ---
+
+@pytest.fixture
+def full_obs_spec():
+    """Task with full observability (default)."""
+    return TaskSpecification.from_dict({
+        "task_id": "test_full_obs",
+        "seed": 42,
+        "difficulty_tier": 1,
+        "maze": {
+            "dimensions": [8, 8],
+            "walls": [],
+            "start": [1, 1],
+            "goal": [6, 6],
+        },
+        "mechanisms": {"keys": [], "doors": [], "switches": [],
+                       "gates": [], "blocks": [], "teleporters": [], "hazards": []},
+        "rules": {"key_consumption": True, "switch_type": "toggle",
+                  "hidden_mechanisms": [], "observability": "full"},
+        "goal": {"type": "reach_position", "target": [6, 6]},
+        "max_steps": 50,
+    })
+
+
+@pytest.fixture
+def view_cone_spec():
+    """Task with view cone partial observability."""
+    return TaskSpecification.from_dict({
+        "task_id": "test_view_cone",
+        "seed": 42,
+        "difficulty_tier": 5,
+        "maze": {
+            "dimensions": [10, 10],
+            "walls": [[5, 1], [5, 2], [5, 3], [5, 5], [5, 6], [5, 7], [5, 8]],
+            "start": [1, 1],
+            "goal": [8, 8],
+        },
+        "mechanisms": {"keys": [], "doors": [], "switches": [],
+                       "gates": [], "blocks": [], "teleporters": [], "hazards": []},
+        "rules": {"key_consumption": True, "switch_type": "toggle",
+                  "hidden_mechanisms": [], "observability": "view_cone", "view_size": 5},
+        "goal": {"type": "reach_position", "target": [8, 8]},
+        "max_steps": 100,
+    })
+
+
+@pytest.fixture
+def fog_of_war_spec():
+    """Task with fog of war partial observability."""
+    return TaskSpecification.from_dict({
+        "task_id": "test_fog_of_war",
+        "seed": 42,
+        "difficulty_tier": 5,
+        "maze": {
+            "dimensions": [10, 10],
+            "walls": [],
+            "start": [1, 1],
+            "goal": [8, 8],
+        },
+        "mechanisms": {"keys": [], "doors": [], "switches": [],
+                       "gates": [], "blocks": [], "teleporters": [], "hazards": []},
+        "rules": {"key_consumption": True, "switch_type": "toggle",
+                  "hidden_mechanisms": [], "observability": "fog_of_war", "view_size": 5},
+        "goal": {"type": "reach_position", "target": [8, 8]},
+        "max_steps": 100,
+    })
+
+
+# --- TaskSpec Rules tests ---
+
+class TestObservabilitySpec:
+    """Test that observability is correctly parsed from task specs."""
+
+    def test_default_observability_is_full(self):
+        rules = Rules.from_dict({})
+        assert rules.observability == "full"
+        assert rules.view_size == 7
+
+    def test_view_cone_parsed(self):
+        rules = Rules.from_dict({"observability": "view_cone", "view_size": 5})
+        assert rules.observability == "view_cone"
+        assert rules.view_size == 5
+
+    def test_fog_of_war_parsed(self):
+        rules = Rules.from_dict({"observability": "fog_of_war", "view_size": 9})
+        assert rules.observability == "fog_of_war"
+        assert rules.view_size == 9
+
+    def test_observability_roundtrip(self, view_cone_spec):
+        """Serialize and deserialize preserves observability."""
+        d = view_cone_spec.to_dict()
+        spec2 = TaskSpecification.from_dict(d)
+        assert spec2.rules.observability == "view_cone"
+        assert spec2.rules.view_size == 5
+
+
+# --- Full observability tests ---
+
+class TestFullObservability:
+    """Verify that full observability mode works as before (no regression)."""
+
+    def test_full_obs_see_through_walls(self, full_obs_spec):
+        """Full obs mode should have see_through_walls=True."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(full_obs_spec)
+        assert env.see_through_walls is True
+
+    def test_full_obs_backend_state(self, full_obs_spec):
+        """Full obs mode should have observability_mode='full' in GridState."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(full_obs_spec)
+        _, state, _ = backend.reset(seed=42)
+        assert state.observability_mode == "full"
+        assert len(state.visible_cells) == 0  # Not tracked in full mode
+        assert len(state.explored_cells) == 0
+
+    def test_full_obs_renders(self, full_obs_spec):
+        """Full obs mode renders a valid RGB image."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(full_obs_spec)
+        obs, _, _ = backend.reset(seed=42)
+        assert obs.shape[2] == 3
+        assert obs.max() > 0
+
+
+# --- View cone tests ---
+
+class TestViewCone:
+    """Test MiniGrid native view cone partial observability."""
+
+    def test_view_cone_env_config(self, view_cone_spec):
+        """View cone mode should configure env with see_through_walls=False."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(view_cone_spec)
+        assert env.see_through_walls is False
+        assert env.agent_view_size == 5
+
+    def test_view_cone_observation_size(self, view_cone_spec):
+        """View cone symbolic observation should match view_size."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(view_cone_spec)
+        obs = env.gen_obs()
+        # MiniGrid observation image shape is (view_size, view_size, 3)
+        assert obs["image"].shape == (5, 5, 3)
+
+    def test_view_cone_visible_cells(self, view_cone_spec):
+        """View cone should report a limited set of visible cells."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(view_cone_spec)
+        visible = env.get_visible_cells()
+        # With view_size=5 and see_through_walls=False, visible cells
+        # should be significantly fewer than total interior cells
+        total_interior = (10 - 2) * (10 - 2)  # 64
+        assert len(visible) > 0
+        assert len(visible) < total_interior
+
+    def test_view_cone_backend_state(self, view_cone_spec):
+        """Backend GridState should include visible cells for view_cone mode."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(view_cone_spec)
+        _, state, _ = backend.reset(seed=42)
+        assert state.observability_mode == "view_cone"
+        assert len(state.visible_cells) > 0
+
+    def test_view_cone_visibility_changes_on_turn(self, view_cone_spec):
+        """Turning should change visible cells (view cone rotates with agent)."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(view_cone_spec)
+        _, state0, _ = backend.reset(seed=42)
+        visible_before = state0.visible_cells
+
+        # Turn left
+        _, _, _, _, state1, _ = backend.step(MiniGridActions.TURN_LEFT)
+        visible_after = state1.visible_cells
+
+        # After turning, some cells should be different
+        assert visible_before != visible_after
+
+    def test_view_cone_renders(self, view_cone_spec):
+        """View cone mode should render with highlight on visible cells."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(view_cone_spec)
+        obs, _, _ = backend.reset(seed=42)
+        assert obs.shape[2] == 3
+        assert obs.max() > 0
+
+    def test_view_cone_walls_block_vision(self, view_cone_spec):
+        """Walls should block vision in view cone mode."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(view_cone_spec)
+        # Agent starts at (1,1) facing right. Wall at (5,1) should block
+        # vision to cells at x>=6 along y=1
+        visible = env.get_visible_cells()
+        # Cells behind the wall at x=5 should not be visible
+        behind_wall = {c for c in visible if c[0] > 5 and c[1] == 1}
+        assert len(behind_wall) == 0, f"Should not see behind wall: {behind_wall}"
+
+
+# --- Fog of war tests ---
+
+class TestFogOfWar:
+    """Test fog of war observability mode."""
+
+    def test_fog_of_war_env_config(self, fog_of_war_spec):
+        """Fog of war should configure env with see_through_walls=False."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(fog_of_war_spec)
+        assert env.see_through_walls is False
+        assert env.agent_view_size == 5
+
+    def test_fog_of_war_initial_explored(self, fog_of_war_spec):
+        """After reset, fog of war should have initial visible area explored."""
+        parser = TaskParser(render_mode="rgb_array")
+        env = parser.parse(fog_of_war_spec)
+        # After reset, explored cells should be the initial visible area
+        assert len(env.explored_cells) > 0
+
+    def test_fog_of_war_explored_grows(self, fog_of_war_spec):
+        """Moving should reveal new cells in fog of war mode."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(fog_of_war_spec)
+        _, state0, _ = backend.reset(seed=42)
+        initial_explored = len(state0.explored_cells)
+
+        # Move forward a few steps (agent starts at (1,1) facing right)
+        for _ in range(3):
+            backend.step(MiniGridActions.MOVE_FORWARD)
+        _, _, _, _, state1, _ = backend.step(MiniGridActions.MOVE_FORWARD)
+
+        # Should have explored more cells
+        assert len(state1.explored_cells) >= initial_explored
+
+    def test_fog_of_war_explored_never_shrinks(self, fog_of_war_spec):
+        """Explored cells should never decrease (monotonically growing)."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(fog_of_war_spec)
+        _, state, _ = backend.reset(seed=42)
+        prev_explored = len(state.explored_cells)
+
+        # Take various actions
+        actions = [
+            MiniGridActions.MOVE_FORWARD,
+            MiniGridActions.MOVE_FORWARD,
+            MiniGridActions.TURN_LEFT,
+            MiniGridActions.MOVE_FORWARD,
+            MiniGridActions.TURN_RIGHT,
+            MiniGridActions.MOVE_FORWARD,
+        ]
+        for action in actions:
+            _, _, _, _, state, _ = backend.step(action)
+            current_explored = len(state.explored_cells)
+            assert current_explored >= prev_explored, \
+                f"Explored cells decreased from {prev_explored} to {current_explored}"
+            prev_explored = current_explored
+
+    def test_fog_of_war_backend_state(self, fog_of_war_spec):
+        """Backend GridState should include explored cells for fog_of_war."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(fog_of_war_spec)
+        _, state, _ = backend.reset(seed=42)
+        assert state.observability_mode == "fog_of_war"
+        assert len(state.explored_cells) > 0
+        assert len(state.visible_cells) > 0
+        # Explored should be superset of visible
+        assert state.visible_cells <= state.explored_cells
+
+
+# --- Task file loading tests ---
+
+class TestPartialObsTaskFiles:
+    """Test loading actual task files with partial observability."""
+
+    def test_hidden_switch_has_view_cone(self):
+        """tier5/hidden_switch_001.json should have view_cone observability."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier5" / "hidden_switch_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+        spec = TaskSpecification.from_json(str(task_path))
+        assert spec.rules.observability == "view_cone"
+        assert spec.rules.view_size == 5
+
+    def test_memory_has_fog_of_war(self):
+        """tier5/memory_003.json should have fog_of_war observability."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier5" / "memory_003.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+        spec = TaskSpecification.from_json(str(task_path))
+        assert spec.rules.observability == "fog_of_war"
+        assert spec.rules.view_size == 7
+
+    def test_hidden_switch_playable_with_view_cone(self):
+        """hidden_switch_001 should be playable with view cone."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier5" / "hidden_switch_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+        backend = MiniGridBackend(render_mode="rgb_array")
+        spec = TaskSpecification.from_json(str(task_path))
+        backend.configure(spec)
+        obs, state, info = backend.reset(seed=42)
+        assert obs.shape[2] == 3
+        assert state.observability_mode == "view_cone"
+        assert len(state.visible_cells) > 0
+
+        # Take a step to verify it works
+        obs, _, _, _, state, _ = backend.step(MiniGridActions.MOVE_FORWARD)
+        assert obs.shape[2] == 3
+
+    def test_memory_playable_with_fog_of_war(self):
+        """memory_003 should be playable with fog of war."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier5" / "memory_003.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+        backend = MiniGridBackend(render_mode="rgb_array")
+        spec = TaskSpecification.from_json(str(task_path))
+        backend.configure(spec)
+        obs, state, info = backend.reset(seed=42)
+        assert obs.shape[2] == 3
+        assert state.observability_mode == "fog_of_war"
+        assert len(state.explored_cells) > 0
+
+    def test_existing_tasks_default_to_full(self):
+        """Tasks without observability field should default to full."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier1" / "maze_simple_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+        spec = TaskSpecification.from_json(str(task_path))
+        assert spec.rules.observability == "full"
diff --git a/src/v1_1/tests/test_performance.py b/src/v1_1/tests/test_performance.py
new file mode 100644
index 00000000..5b3999f5
--- /dev/null
+++ b/src/v1_1/tests/test_performance.py
@@ -0,0 +1,263 @@
+# test_performance.py
+
+import pytest
+import time
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.env import MultiGridEnv, Action
+
+
+def create_task(grid_size=10, max_steps=100):
+    """Helper to create a task spec for performance testing."""
+    return {
+        "task_id": "perf_test",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [
+                {
+                    "id": "cube_red",
+                    "type": "movable",
+                    "color": "red",
+                    "position": {"x": 0.5, "y": 0.5},
+                    "size": 0.1
+                }
+            ],
+            "agent": {
+                "position": {"x": 0.1, "y": 0.1},
+                "facing": 0
+            }
+        },
+        "goal": {
+            "predicate": "reach_position",
+            "position": {"x": 0.9, "y": 0.9}
+        },
+        "limits": {"max_steps": max_steps},
+        "tiling": {"type": "square", "grid_size": {"width": grid_size, "height": grid_size}}
+    }
+
+
+class TestPerformance:
+    """Performance benchmark tests."""
+
+    @pytest.mark.parametrize("grid_size", [10, 25, 50])
+    @pytest.mark.parametrize("tiling", ["square", "hex", "triangle"])
+    def test_reset_time(self, grid_size, tiling):
+        """Reset should complete within time budget."""
+        task = create_task(grid_size=grid_size)
+        task["tiling"]["type"] = tiling
+
+        env = MultiGridEnv(task, tiling=tiling)
+
+        times = []
+        for _ in range(10):
+            start = time.time()
+            env.reset()
+            elapsed = time.time() - start
+            times.append(elapsed)
+
+        avg_time = sum(times) / len(times)
+        max_time = max(times)
+
+        # Soft guidelines from spec
+        if grid_size <= 25:
+            assert avg_time < 0.2, \
+                f"{tiling} grid {grid_size}x{grid_size} reset took {avg_time:.3f}s (should be < 0.2s)"
+        else:
+            assert avg_time < 0.7, \
+                f"{tiling} grid {grid_size}x{grid_size} reset took {avg_time:.3f}s (should be < 0.7s)"
+
+        print(f"\n{tiling} {grid_size}x{grid_size}: avg={avg_time*1000:.1f}ms, max={max_time*1000:.1f}ms")
+
+    @pytest.mark.parametrize("tiling", ["square", "hex", "triangle"])
+    def test_step_throughput(self, tiling):
+        """Step should achieve target throughput."""
+        task = create_task(grid_size=20, max_steps=1100)
+        task["tiling"]["type"] = tiling
+
+        env = MultiGridEnv(task, tiling=tiling)
+        env.reset()
+
+        # Measure throughput over 1000 steps
+        start = time.time()
+        for _ in range(1000):
+            env.step(Action.TURN_RIGHT)
+        elapsed = time.time() - start
+
+        steps_per_second = 1000 / elapsed
+
+        # Soft guidelines - triangle grid has more cells and is expected to be slower
+        if tiling == "triangle":
+            assert steps_per_second > 100, \
+                f"{tiling} achieved {steps_per_second:.0f} steps/sec (should be > 100)"
+        else:
+            assert steps_per_second > 700, \
+                f"{tiling} achieved {steps_per_second:.0f} steps/sec (should be > 700)"
+
+        print(f"\n{tiling} throughput: {steps_per_second:.0f} steps/sec")
+
+    def test_large_grid_scalability(self):
+        """Test that very large grids are still performant."""
+        task = create_task(grid_size=100)
+        env = MultiGridEnv(task, tiling="square")
+
+        # Reset time
+        start = time.time()
+        env.reset()
+        reset_time = time.time() - start
+
+        assert reset_time < 2.0, \
+            f"Large grid (100x100) reset took {reset_time:.2f}s (should be < 2.0s)"
+
+        # Step throughput - with rendering this will be slower
+        start = time.time()
+        for _ in range(100):
+            env.step(Action.FORWARD)
+        step_time = time.time() - start
+
+        # Relaxed constraint - with rendering overhead
+        assert step_time < 2.0, \
+            f"Large grid (100x100) 100 steps took {step_time:.2f}s (should be < 2.0s)"
+
+        print(f"\n100x100 grid: reset={reset_time*1000:.0f}ms, 100 steps={step_time*1000:.0f}ms")
+
+    @pytest.mark.parametrize("tiling", ["square", "hex", "triangle"])
+    def test_memory_efficiency(self, tiling):
+        """Test that environment instances don't consume excessive memory."""
+        import psutil
+        import os
+
+        process = psutil.Process(os.getpid())
+        initial_memory = process.memory_info().rss / 1024 / 1024  # MB
+
+        # Create multiple environment instances
+        envs = []
+        for i in range(10):
+            task = create_task(grid_size=20)
+            task["tiling"]["type"] = tiling
+            task["task_id"] = f"test_{i}"
+
+            env = MultiGridEnv(task, tiling=tiling)
+            env.reset()
+            envs.append(env)
+
+        final_memory = process.memory_info().rss / 1024 / 1024  # MB
+        memory_per_env = (final_memory - initial_memory) / 10
+
+        # Each environment should use less than 10MB
+        assert memory_per_env < 10, \
+            f"{tiling} env uses {memory_per_env:.1f}MB (should be < 10MB)"
+
+        print(f"\n{tiling} memory per env: {memory_per_env:.1f}MB")
+
+        # Clean up
+        del envs
+
+    def test_rapid_reset_performance(self):
+        """Test rapid reset/step cycles."""
+        task = create_task(grid_size=10, max_steps=5)
+        env = MultiGridEnv(task, tiling="square")
+
+        start = time.time()
+        for _ in range(100):
+            env.reset()
+            for _ in range(5):
+                env.step(Action.TURN_RIGHT)
+        elapsed = time.time() - start
+
+        episodes_per_second = 100 / elapsed
+
+        assert episodes_per_second > 50, \
+            f"Rapid reset achieved {episodes_per_second:.0f} episodes/sec (should be > 50)"
+
+        print(f"\nRapid reset: {episodes_per_second:.0f} episodes/sec")
+
+
+class TestScalability:
+    """Tests for system scalability."""
+
+    @pytest.mark.parametrize("num_objects", [1, 10, 50])
+    def test_many_objects(self, num_objects):
+        """Test performance with many objects in scene."""
+        task = create_task(grid_size=20)
+
+        # Add many objects
+        objects = []
+        for i in range(num_objects):
+            x = 0.1 + (i % 5) * 0.15
+            y = 0.1 + (i // 5) * 0.15
+            objects.append({
+                "id": f"cube_{i}",
+                "type": "movable",
+                "color": "red" if i % 2 == 0 else "blue",
+                "position": {"x": x, "y": y},
+                "size": 0.1
+            })
+        task["scene"]["objects"] = objects
+
+        env = MultiGridEnv(task, tiling="square")
+
+        # Measure reset time
+        start = time.time()
+        env.reset()
+        reset_time = time.time() - start
+
+        # Reset time should scale reasonably
+        expected_time = 0.05 + (num_objects * 0.002)  # Base + per-object
+        assert reset_time < expected_time, \
+            f"Reset with {num_objects} objects took {reset_time:.3f}s"
+
+        # Measure step time
+        start = time.time()
+        for _ in range(100):
+            env.step(Action.TURN_RIGHT)
+        step_time = time.time() - start
+
+        # Step time should not be significantly affected by number of objects
+        assert step_time < 0.15, \
+            f"100 steps with {num_objects} objects took {step_time:.3f}s"
+
+        print(f"\n{num_objects} objects: reset={reset_time*1000:.1f}ms, 100 steps={step_time*1000:.1f}ms")
+
+    def test_concurrent_environments(self):
+        """Test that multiple environments can coexist without interference."""
+        tasks = []
+        envs = []
+
+        # Create 5 different environments with varying seeds and agent positions
+        for i in range(5):
+            task = create_task(grid_size=10)
+            task["seed"] = 100 + i
+            task["task_id"] = f"concurrent_{i}"
+            # Vary agent start position to ensure different states
+            x = 0.1 + (i * 0.15)
+            y = 0.1 + (i * 0.15)
+            task["scene"]["agent"]["position"] = {"x": x, "y": y}
+            tasks.append(task)
+
+            env = MultiGridEnv(task, tiling="square")
+            env.reset(seed=100 + i)
+            envs.append(env)
+
+        # Step each environment independently
+        for i, env in enumerate(envs):
+            for _ in range(10):
+                env.step(Action.FORWARD)
+
+        # Verify environments maintain independent states
+        # Check that at least some environments have different states
+        different_states = 0
+        for i in range(len(envs)):
+            for j in range(i + 1, len(envs)):
+                if envs[i].state.agent.cell_id != envs[j].state.agent.cell_id or \
+                   envs[i].state.agent.facing != envs[j].state.agent.facing:
+                    different_states += 1
+
+        # At least half of the environment pairs should have different states
+        total_pairs = len(envs) * (len(envs) - 1) // 2
+        assert different_states >= total_pairs // 2, \
+            f"Only {different_states}/{total_pairs} environment pairs have different states"
diff --git a/src/v1_1/tests/test_regression.py b/src/v1_1/tests/test_regression.py
new file mode 100644
index 00000000..c6507779
--- /dev/null
+++ b/src/v1_1/tests/test_regression.py
@@ -0,0 +1,94 @@
+# test_regression.py
+
+"""
+Regression tests for previously-fixed bugs in MultiGrid.
+
+E.7.1: Hex odd-row neighbor symmetry
+E.7.2: Triangle facing validity after movement
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.tilings import HexTiling, TriangleTiling
+from multigrid.env import MultiGridEnv, Action
+
+
+class TestRegression:
+    """Regression tests for previously-identified edge-case bugs."""
+
+    def test_hex_neighbor_at_odd_row(self):
+        """Hex cells at odd rows have correct bidirectional neighbor links.
+
+        Validates odd-r offset coordinate neighbor computation: every
+        neighbor link must have a reverse link back to the original cell.
+        """
+        tiling = HexTiling()
+        tiling.generate_graph(8, 8)
+
+        # Pick all cells at odd rows
+        odd_row_cells = [
+            cid for cid, cell in tiling.cells.items() if cell.row % 2 == 1
+        ]
+        assert len(odd_row_cells) > 0, "Should have cells at odd rows"
+
+        for cell_id in odd_row_cells:
+            cell = tiling.cells[cell_id]
+            for direction, neighbor_id in cell.neighbors.items():
+                # Neighbor must exist in the tiling
+                assert neighbor_id in tiling.cells, \
+                    f"Neighbor {neighbor_id} of {cell_id} not in tiling"
+
+                # Neighbor must have a reverse link back
+                neighbor_cell = tiling.cells[neighbor_id]
+                reverse_found = cell_id in neighbor_cell.neighbors.values()
+                assert reverse_found, (
+                    f"Cell {cell_id} links to {neighbor_id} via {direction}, "
+                    f"but {neighbor_id} has no reverse link back"
+                )
+
+    def test_triangle_facing_after_move(self):
+        """Agent facing remains valid after movement on triangle grid.
+
+        Triangle tiling has 3 directions. Moving forward must not corrupt
+        the facing index outside the valid range.
+        """
+        task = {
+            "task_id": "test_tri_facing",
+            "seed": 42,
+            "scene": {
+                "bounds": {"width": 1.0, "height": 1.0},
+                "objects": [],
+                "agent": {"position": {"x": 0.5, "y": 0.5}, "facing": 0},
+            },
+            "goal": {"predicate": "reach_position", "position": {"x": 0.9, "y": 0.9}},
+            "limits": {"max_steps": 50},
+            "tiling": {"type": "triangle", "grid_size": {"width": 5, "height": 5}},
+        }
+
+        env = MultiGridEnv(task, tiling="triangle")
+        env.reset()
+
+        num_directions = len(env.tiling.directions)
+
+        # Execute a series of movements and turns
+        actions = [
+            Action.FORWARD,
+            Action.TURN_RIGHT,
+            Action.FORWARD,
+            Action.TURN_LEFT,
+            Action.FORWARD,
+            Action.TURN_RIGHT,
+            Action.TURN_RIGHT,
+            Action.FORWARD,
+        ]
+
+        for i, action in enumerate(actions):
+            env.step(action)
+            facing = env.state.agent.facing
+            assert 0 <= facing < num_directions, (
+                f"After action {action.name} (step {i+1}), facing={facing} "
+                f"is outside valid range [0, {num_directions})"
+            )
diff --git a/src/v1_1/tests/test_teleporters.py b/src/v1_1/tests/test_teleporters.py
new file mode 100644
index 00000000..a10c54b4
--- /dev/null
+++ b/src/v1_1/tests/test_teleporters.py
@@ -0,0 +1,208 @@
+"""Tests for teleporter functionality in MiniGrid backend."""
+
+import pytest
+import sys
+import os
+from pathlib import Path
+
+_v1_1_dir = str(Path(__file__).resolve().parent.parent)
+if _v1_1_dir not in sys.path:
+    sys.path.insert(0, _v1_1_dir)
+
+from gridworld.task_spec import TaskSpecification
+from gridworld.task_parser import TaskParser
+from gridworld.backends.minigrid_backend import MiniGridBackend
+from gridworld.actions import MiniGridActions
+from gridworld.custom_env import TeleporterObj
+
+
+@pytest.fixture
+def teleporter_spec():
+    """Create a simple task with a teleporter."""
+    return TaskSpecification.from_dict({
+        "task_id": "test_teleporter",
+        "seed": 42,
+        "difficulty_tier": 5,
+        "maze": {
+            "dimensions": [8, 8],
+            "walls": [],
+            "start": [1, 1],
+            "goal": [6, 6],
+        },
+        "mechanisms": {
+            "teleporters": [
+                {
+                    "id": "tp1",
+                    "position_a": [2, 1],
+                    "position_b": [5, 5],
+                    "bidirectional": True,
+                }
+            ]
+        },
+        "goal": {"type": "reach_position", "target": [6, 6]},
+        "max_steps": 50,
+    })
+
+
+@pytest.fixture
+def oneway_teleporter_spec():
+    """Create a task with a one-way teleporter."""
+    return TaskSpecification.from_dict({
+        "task_id": "test_oneway_teleporter",
+        "seed": 42,
+        "difficulty_tier": 5,
+        "maze": {
+            "dimensions": [8, 8],
+            "walls": [],
+            "start": [1, 1],
+            "goal": [6, 6],
+        },
+        "mechanisms": {
+            "teleporters": [
+                {
+                    "id": "tp1",
+                    "position_a": [2, 1],
+                    "position_b": [5, 5],
+                    "bidirectional": False,
+                }
+            ]
+        },
+        "goal": {"type": "reach_position", "target": [6, 6]},
+        "max_steps": 50,
+    })
+
+
+class TestTeleporterValidation:
+    """Test teleporter position validation in task_spec."""
+
+    def test_valid_teleporter_passes_validation(self, teleporter_spec):
+        is_valid, errors = teleporter_spec.validate()
+        assert is_valid, f"Validation errors: {errors}"
+
+    def test_oob_teleporter_a_fails(self):
+        spec = TaskSpecification.from_dict({
+            "task_id": "test",
+            "seed": 42,
+            "difficulty_tier": 5,
+            "maze": {"dimensions": [8, 8], "walls": [], "start": [1, 1], "goal": [6, 6]},
+            "mechanisms": {
+                "teleporters": [{"id": "tp", "position_a": [10, 10], "position_b": [3, 3]}]
+            },
+            "goal": {"type": "reach_position", "target": [6, 6]},
+            "max_steps": 50,
+        })
+        is_valid, errors = spec.validate()
+        assert not is_valid
+        assert any("Teleporter" in e and "endpoint A" in e for e in errors)
+
+    def test_oob_teleporter_b_fails(self):
+        spec = TaskSpecification.from_dict({
+            "task_id": "test",
+            "seed": 42,
+            "difficulty_tier": 5,
+            "maze": {"dimensions": [8, 8], "walls": [], "start": [1, 1], "goal": [6, 6]},
+            "mechanisms": {
+                "teleporters": [{"id": "tp", "position_a": [3, 3], "position_b": [10, 10]}]
+            },
+            "goal": {"type": "reach_position", "target": [6, 6]},
+            "max_steps": 50,
+        })
+        is_valid, errors = spec.validate()
+        assert not is_valid
+        assert any("Teleporter" in e and "endpoint B" in e for e in errors)
+
+
+class TestTeleporterPlacement:
+    """Test that teleporters are placed in the environment."""
+
+    def test_teleporter_objects_placed(self, teleporter_spec):
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(teleporter_spec)
+        obs, state, info = backend.reset(seed=42)
+
+        assert len(backend.env.teleporters) == 2  # Two endpoints
+        assert "tp1_a" in backend.env.teleporters
+        assert "tp1_b" in backend.env.teleporters
+
+    def test_teleporter_objects_are_correct_type(self, teleporter_spec):
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(teleporter_spec)
+        backend.reset(seed=42)
+
+        for tp in backend.env.teleporters.values():
+            assert isinstance(tp, TeleporterObj)
+
+    def test_bidirectional_partners(self, teleporter_spec):
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(teleporter_spec)
+        backend.reset(seed=42)
+
+        tp_a = backend.env.teleporters["tp1_a"]
+        tp_b = backend.env.teleporters["tp1_b"]
+        assert tp_a.partner is tp_b
+        assert tp_b.partner is tp_a
+
+    def test_oneway_partner(self, oneway_teleporter_spec):
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(oneway_teleporter_spec)
+        backend.reset(seed=42)
+
+        tp_a = backend.env.teleporters["tp1_a"]
+        tp_b = backend.env.teleporters["tp1_b"]
+        assert tp_a.partner is tp_b
+        assert tp_b.partner is None  # One-way: B doesn't teleport to A
+
+
+class TestTeleporterMechanics:
+    """Test teleporter step mechanics."""
+
+    def test_agent_teleports_on_step(self, teleporter_spec):
+        """Agent at (1,1) facing right, move forward to (2,1) which is teleporter A -> should teleport to (5,5)."""
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(teleporter_spec)
+        obs, state, info = backend.reset(seed=42)
+
+        # Agent starts at (1,1) facing right (dir=0)
+        assert state.agent_position == (1, 1)
+
+        # Move forward: agent goes to (2,1) where teleporter A is
+        obs, reward, term, trunc, state, info = backend.step(MiniGridActions.MOVE_FORWARD)
+
+        # Should have been teleported to (5,5)
+        assert state.agent_position == (5, 5), f"Expected (5,5), got {state.agent_position}"
+
+    def test_teleporter_cooldown_in_state(self, teleporter_spec):
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(teleporter_spec)
+        obs, state, info = backend.reset(seed=42)
+
+        # Check that teleporter cooldowns are tracked
+        assert "tp1_a" in state.teleporter_cooldowns
+        assert "tp1_b" in state.teleporter_cooldowns
+        assert state.teleporter_cooldowns["tp1_a"] == 0
+        assert state.teleporter_cooldowns["tp1_b"] == 0
+
+
+class TestTeleporterTaskFile:
+    """Test loading the tier5 teleporter task JSON."""
+
+    def test_load_teleporter_task(self):
+        task_path = Path(__file__).resolve().parent.parent / "gridworld" / "tasks" / "tier5" / "teleporter_004.json"
+        spec = TaskSpecification.from_json(str(task_path))
+        assert spec.task_id == "tier5_teleporter_004"
+        assert len(spec.mechanisms.teleporters) == 2
+
+    def test_teleporter_task_validates(self):
+        task_path = Path(__file__).resolve().parent.parent / "gridworld" / "tasks" / "tier5" / "teleporter_004.json"
+        spec = TaskSpecification.from_json(str(task_path))
+        is_valid, errors = spec.validate()
+        assert is_valid, f"Validation errors: {errors}"
+
+    def test_teleporter_task_runs(self):
+        task_path = Path(__file__).resolve().parent.parent / "gridworld" / "tasks" / "tier5" / "teleporter_004.json"
+        spec = TaskSpecification.from_json(str(task_path))
+        backend = MiniGridBackend(render_mode="rgb_array")
+        backend.configure(spec)
+        obs, state, info = backend.reset(seed=42)
+        assert state.agent_position == (1, 1)
+        assert len(backend.env.teleporters) == 4  # 2 teleporters * 2 endpoints
diff --git a/src/v1_1/tests/test_tiling_generation.py b/src/v1_1/tests/test_tiling_generation.py
new file mode 100644
index 00000000..2724d180
--- /dev/null
+++ b/src/v1_1/tests/test_tiling_generation.py
@@ -0,0 +1,85 @@
+# test_tiling_generation.py
+
+import pytest
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from multigrid.tilings.square import SquareTiling
+from multigrid.tilings.hex import HexTiling
+from multigrid.tilings.triangle import TriangleTiling
+
+
+class TestTilingGeneration:
+    """Tests for tiling graph generation."""
+
+    @pytest.mark.parametrize("tiling_class,expected_dirs", [
+        (SquareTiling, 4),
+        (HexTiling, 6),
+        (TriangleTiling, 3),
+    ])
+    def test_direction_count(self, tiling_class, expected_dirs):
+        """Each tiling type has correct number of directions."""
+        tiling = tiling_class()
+        assert len(tiling.directions) == expected_dirs
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_cell_count(self, tiling_class):
+        """Grid generates expected number of cells."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=10, height=8, seed=42)
+
+        if tiling_class == SquareTiling:
+            assert len(cells) == 80  # 10 * 8
+        elif tiling_class == HexTiling:
+            assert len(cells) == 80  # Rectangular hex grid
+        elif tiling_class == TriangleTiling:
+            assert len(cells) == 480  # 10 * 8 * 6 (each hex subdivided into 6 triangles)
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_boundary_cells_have_fewer_neighbors(self, tiling_class):
+        """Cells at grid boundary have fewer neighbors than interior."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=5, height=5, seed=0)
+
+        # Corner cells should have minimum neighbors
+        # Interior cells should have maximum neighbors
+        neighbor_counts = [len(c.neighbors) for c in cells.values()]
+
+        assert min(neighbor_counts) < max(neighbor_counts)
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_adjacency_symmetry(self, tiling_class):
+        """If A neighbors B, then B neighbors A."""
+        tiling = tiling_class()
+        cells = tiling.generate_graph(width=5, height=5, seed=0)
+
+        for cell_id, cell in cells.items():
+            for direction, neighbor_id in cell.neighbors.items():
+                neighbor = cells[neighbor_id]
+                # Neighbor should have some direction pointing back
+                assert cell_id in neighbor.neighbors.values(), \
+                    f"Asymmetric: {cell_id} -> {neighbor_id} but not reverse"
+
+    @pytest.mark.parametrize("tiling_class", [
+        SquareTiling, HexTiling, TriangleTiling
+    ])
+    def test_seed_determinism(self, tiling_class):
+        """Same seed produces identical graph."""
+        tiling1 = tiling_class()
+        tiling2 = tiling_class()
+
+        cells1 = tiling1.generate_graph(10, 10, seed=12345)
+        cells2 = tiling2.generate_graph(10, 10, seed=12345)
+
+        assert set(cells1.keys()) == set(cells2.keys())
+        for cell_id in cells1:
+            assert cells1[cell_id].neighbors == cells2[cell_id].neighbors
diff --git a/src/v1_1/tests/test_vlm_sanity_check.py b/src/v1_1/tests/test_vlm_sanity_check.py
new file mode 100644
index 00000000..a69a9bbe
--- /dev/null
+++ b/src/v1_1/tests/test_vlm_sanity_check.py
@@ -0,0 +1,256 @@
+"""Tests for VLM vision sanity check module.
+
+Tests question generation and answer checking logic without requiring a VLM.
+Uses a mock ask function to simulate VLM responses.
+"""
+
+import pytest
+import sys
+import os
+from pathlib import Path
+
+_v1_1_dir = str(Path(__file__).resolve().parent.parent)
+if _v1_1_dir not in sys.path:
+    sys.path.insert(0, _v1_1_dir)
+
+from gridworld.task_spec import TaskSpecification
+from gridworld.backends.minigrid_backend import MiniGridBackend
+from gridworld.backends.base import GridState
+from vlm_sanity_check import (
+    generate_questions_for_task,
+    check_answer,
+    run_sanity_check,
+    VisionQuestion,
+)
+
+
+# --- Answer checking ---
+
+class TestCheckAnswer:
+    """Test the keyword matching logic."""
+
+    def test_exact_match(self):
+        passed, matched = check_answer("I see a blue triangle", ["blue", "triangle"])
+        assert passed
+        assert "blue" in matched
+        assert "triangle" in matched
+
+    def test_case_insensitive(self):
+        passed, matched = check_answer("BLUE TRIANGLE", ["blue", "triangle"])
+        assert passed
+
+    def test_partial_match_passes(self):
+        """At least one keyword match should pass."""
+        passed, matched = check_answer("I see something green", ["green", "square"])
+        assert passed
+        assert "green" in matched
+
+    def test_no_match_fails(self):
+        passed, matched = check_answer("I see nothing interesting", ["blue", "triangle"])
+        assert not passed
+        assert len(matched) == 0
+
+    def test_empty_answer(self):
+        passed, matched = check_answer("", ["blue"])
+        assert not passed
+
+    def test_keyword_in_longer_word(self):
+        """Keywords can match as substrings."""
+        passed, matched = check_answer("The triangle-shaped agent is blue", ["triangle"])
+        assert passed
+
+
+# --- Question generation ---
+
+class TestGenerateQuestions:
+    """Test question generation for different task types."""
+
+    @pytest.fixture
+    def simple_maze_spec(self):
+        return TaskSpecification.from_dict({
+            "task_id": "test_simple",
+            "seed": 42,
+            "difficulty_tier": 1,
+            "maze": {
+                "dimensions": [8, 8],
+                "walls": [[4, 1], [4, 2], [4, 3]],
+                "start": [1, 1],
+                "goal": [6, 6],
+            },
+            "mechanisms": {"keys": [], "doors": [], "switches": [],
+                           "gates": [], "blocks": [], "teleporters": [], "hazards": []},
+            "rules": {"key_consumption": True, "switch_type": "toggle"},
+            "goal": {"type": "reach_position", "target": [6, 6]},
+            "max_steps": 50,
+        })
+
+    @pytest.fixture
+    def complex_spec(self):
+        return TaskSpecification.from_dict({
+            "task_id": "test_complex",
+            "seed": 42,
+            "difficulty_tier": 3,
+            "maze": {
+                "dimensions": [10, 10],
+                "walls": [[5, 1], [5, 2]],
+                "start": [1, 1],
+                "goal": [8, 8],
+            },
+            "mechanisms": {
+                "keys": [{"id": "k1", "position": [2, 3], "color": "blue"}],
+                "doors": [{"id": "d1", "position": [5, 3], "requires_key": "blue"}],
+                "switches": [{"id": "s1", "position": [3, 5], "controls": ["g1"]}],
+                "gates": [{"id": "g1", "position": [5, 5]}],
+                "blocks": [],
+                "teleporters": [],
+                "hazards": [{"id": "h1", "position": [7, 7], "hazard_type": "lava"}],
+            },
+            "rules": {"key_consumption": True, "switch_type": "toggle"},
+            "goal": {"type": "reach_position", "target": [8, 8]},
+            "max_steps": 100,
+        })
+
+    def test_simple_maze_questions(self, simple_maze_spec):
+        """Simple maze should generate agent, goal, wall, and spatial questions."""
+        state = GridState(agent_position=(1, 1), agent_direction=0)
+        questions = generate_questions_for_task(simple_maze_spec, state)
+
+        categories = [q.category for q in questions]
+        assert "object_id" in categories
+        assert "spatial" in categories
+
+        # Should have at least: agent, goal, wall identification + spatial questions
+        assert len(questions) >= 5
+
+    def test_complex_task_has_more_questions(self, complex_spec):
+        """Tasks with more mechanisms should generate more questions."""
+        state = GridState(agent_position=(1, 1), agent_direction=0)
+        questions = generate_questions_for_task(complex_spec, state)
+
+        # Should have key, door, switch, hazard questions in addition to basics
+        q_texts = " ".join(q.question.lower() for q in questions)
+        assert "key" in q_texts
+        assert "door" in q_texts
+        assert "switch" in q_texts or "button" in q_texts
+        assert "hazard" in q_texts or "lava" in q_texts
+
+    def test_spatial_direction_question(self, simple_maze_spec):
+        """Should ask about agent direction."""
+        state = GridState(agent_position=(1, 1), agent_direction=0)  # facing right
+        questions = generate_questions_for_task(simple_maze_spec, state)
+
+        dir_questions = [q for q in questions if "direction" in q.question.lower() or "facing" in q.question.lower()]
+        assert len(dir_questions) > 0
+        # Agent faces right (dir=0), so expected keyword should be "right"
+        assert "right" in dir_questions[0].expected_keywords
+
+    def test_goal_relative_position(self, simple_maze_spec):
+        """Should ask where goal is relative to agent."""
+        # Agent at (1,1), goal at (6,6) → goal is below and to the right
+        state = GridState(agent_position=(1, 1), agent_direction=0)
+        questions = generate_questions_for_task(simple_maze_spec, state)
+
+        rel_questions = [q for q in questions if "relative" in q.question.lower()]
+        assert len(rel_questions) > 0
+        # Goal is at (6,6), agent at (1,1) → right (x: 6>1) and below (y: 6>1)
+        assert "right" in rel_questions[0].expected_keywords
+        assert "below" in rel_questions[0].expected_keywords
+
+    def test_no_key_question_without_keys(self, simple_maze_spec):
+        """Simple maze with no keys should NOT generate key questions."""
+        state = GridState(agent_position=(1, 1), agent_direction=0)
+        questions = generate_questions_for_task(simple_maze_spec, state)
+
+        key_questions = [q for q in questions if "key" in q.question.lower()]
+        assert len(key_questions) == 0
+
+
+# --- Mock VLM sanity check ---
+
+class TestMockSanityCheck:
+    """Test the full sanity check pipeline with mock VLM responses."""
+
+    def test_perfect_mock_vlm(self):
+        """A mock VLM that always answers correctly should get 100%."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier1" / "maze_simple_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+
+        def mock_ask(image, question):
+            # Return an answer that matches common keywords
+            return (
+                "I see a blue triangle agent facing right on a grid. "
+                "There is a green goal square. There are grey walls. "
+                "The grid appears to be about 8x8. "
+                "The goal is below and to the right of the agent."
+            )
+
+        report = run_sanity_check(str(task_path), mock_ask, "mock_perfect", verbose=False)
+        assert report.passed > 0
+        assert report.object_id_score > 0
+
+    def test_blind_mock_vlm(self):
+        """A mock VLM that returns garbage should score poorly."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier1" / "maze_simple_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+
+        def mock_ask(image, question):
+            return "I cannot process this image."
+
+        report = run_sanity_check(str(task_path), mock_ask, "mock_blind", verbose=False)
+        assert report.failed > 0
+        assert report.object_id_score < 1.0
+
+    def test_error_handling_mock(self):
+        """VLM errors should be captured gracefully."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier1" / "maze_simple_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+
+        def mock_ask(image, question):
+            raise ConnectionError("VLM server not available")
+
+        report = run_sanity_check(str(task_path), mock_ask, "mock_error", verbose=False)
+        # All should fail with errors
+        assert report.failed == report.total_questions
+        for r in report.results:
+            assert r.error is not None
+
+    def test_report_serialization(self):
+        """Report should serialize to dict cleanly."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier1" / "maze_simple_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+
+        def mock_ask(image, question):
+            return "Blue triangle agent on a grid with green goal."
+
+        report = run_sanity_check(str(task_path), mock_ask, "mock", verbose=False)
+        d = report.to_dict()
+        assert "model_name" in d
+        assert "task_id" in d
+        assert "results" in d
+        assert isinstance(d["results"], list)
+
+    def test_image_passed_to_vlm(self):
+        """The ask function should receive a valid RGB image."""
+        task_path = Path(_v1_1_dir) / "gridworld" / "tasks" / "tier2" / "single_key_001.json"
+        if not task_path.exists():
+            pytest.skip("Task file not found")
+
+        received_images = []
+
+        def mock_ask(image, question):
+            received_images.append(image)
+            return "blue triangle green goal red key"
+
+        report = run_sanity_check(str(task_path), mock_ask, "mock", verbose=False)
+
+        # All questions should have received the same image
+        assert len(received_images) == report.total_questions
+        for img in received_images:
+            assert img.ndim == 3
+            assert img.shape[2] == 3  # RGB
+            assert img.dtype.name == "uint8"
+            assert img.max() > 0  # Not blank
diff --git a/src/v1_1/visualize_all_tilings.py b/src/v1_1/visualize_all_tilings.py
new file mode 100644
index 00000000..7e2edd6e
--- /dev/null
+++ b/src/v1_1/visualize_all_tilings.py
@@ -0,0 +1,543 @@
+"""
+Visualization script for all MultiGrid tiling types.
+
+Generates PNG images of every tiling supported by the MultiGrid framework:
+  1. Square (4-connected)
+  2. Hexagonal (6-connected)
+  3. Triangular (3-connected)
+  4. 3-4-6-4 Rhombitrihexagonal (mixed 3/4/6 connected)
+  5. 4-8-8 Truncated Square (mixed 4/8 connected)
+
+Each tiling is rendered with cells colored by polygon type (triangle=red,
+square=blue, hexagon=green, octagon=purple). For uniform tilings the polygon
+type maps directly to the neighbor count; for Archimedean tilings the actual
+tile_type metadata is used so boundary cells are colored correctly. A sample
+cell and its neighbors are highlighted in gold, and the title shows cell
+count, neighbor count range, and tiling name.
+
+Output files are saved to the current working directory:
+  - tiling_square.png
+  - tiling_hex.png
+  - tiling_triangle.png
+  - tiling_3464.png
+  - tiling_488.png
+  - tiling_comparison.png (all five side-by-side)
+"""
+
+import math
+import sys
+import os
+
+# Add the v1_1 directory to sys.path so multigrid imports resolve
+_V1_1_DIR = os.path.dirname(os.path.abspath(__file__))
+if _V1_1_DIR not in sys.path:
+    sys.path.insert(0, _V1_1_DIR)
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.patches import Polygon as MplPolygon, Rectangle, RegularPolygon
+
+from multigrid.tilings import (
+    SquareTiling,
+    HexTiling,
+    TriangleTiling,
+    Archimedean3464Tiling,
+    Archimedean488Tiling,
+)
+
+
+# ---------------------------------------------------------------------------
+# Color palette: maps neighbor count to a distinct color
+# ---------------------------------------------------------------------------
+NEIGHBOR_COLORS = {
+    3: "#E74C3C",   # red for triangles (3 neighbors)
+    4: "#3498DB",   # blue for squares (4 neighbors)
+    6: "#2ECC71",   # green for hexagons (6 neighbors)
+    8: "#9B59B6",   # purple for octagons (8 neighbors)
+}
+
+# Colors keyed by tile_type name (used for Archimedean tilings where
+# boundary cells may have fewer neighbors than their polygon's edge count)
+TILE_TYPE_COLORS = {
+    "triangle": "#E74C3C",
+    "square":   "#3498DB",
+    "hexagon":  "#2ECC71",
+    "octagon":  "#9B59B6",
+}
+
+# Fallback gradient for any unexpected neighbor counts
+_FALLBACK_CMAP = plt.cm.viridis
+
+
+def _color_for_neighbor_count(count, min_n, max_n):
+    """Return a face color based on the number of neighbors a cell has."""
+    if count in NEIGHBOR_COLORS:
+        return NEIGHBOR_COLORS[count]
+    # Fallback: map linearly into viridis
+    if max_n == min_n:
+        return _FALLBACK_CMAP(0.5)
+    t = (count - min_n) / (max_n - min_n)
+    return _FALLBACK_CMAP(t)
+
+
+def _color_for_tile_type(cell):
+    """Return a face color based on the tile_type stored in tiling_coords.
+
+    Falls back to neighbor-count coloring if tile_type is not available.
+    """
+    tc = cell.tiling_coords
+    if isinstance(tc, dict) and "tile_type" in tc:
+        tile_type = tc["tile_type"]
+        if tile_type in TILE_TYPE_COLORS:
+            return TILE_TYPE_COLORS[tile_type]
+    return _color_for_neighbor_count(len(cell.neighbors), 0, 8)
+
+
+# ---------------------------------------------------------------------------
+# Per-tiling drawing helpers
+# ---------------------------------------------------------------------------
+
+def _draw_square_cell(ax, cell, cell_width, cell_height, facecolor, edgecolor,
+                      linewidth=0.5, alpha=0.85):
+    """Draw a single square cell as a Rectangle patch."""
+    cx, cy = cell.position_hint
+    rect = Rectangle(
+        (cx - cell_width / 2, cy - cell_height / 2),
+        cell_width,
+        cell_height,
+        linewidth=linewidth,
+        edgecolor=edgecolor,
+        facecolor=facecolor,
+        alpha=alpha,
+    )
+    ax.add_patch(rect)
+
+
+def _draw_hex_cell(ax, cell, hex_size, facecolor, edgecolor,
+                   linewidth=0.5, alpha=0.85):
+    """Draw a single hexagonal cell as a RegularPolygon (pointy-top)."""
+    cx, cy = cell.position_hint
+    hex_patch = RegularPolygon(
+        (cx, cy),
+        numVertices=6,
+        radius=hex_size,
+        orientation=math.pi / 6,  # pointy-top
+        linewidth=linewidth,
+        edgecolor=edgecolor,
+        facecolor=facecolor,
+        alpha=alpha,
+    )
+    ax.add_patch(hex_patch)
+
+
+def _draw_triangle_cell(ax, cell, facecolor, edgecolor,
+                        linewidth=0.5, alpha=0.85):
+    """Draw a single triangle cell using its hex_center and tri_idx."""
+    tc = cell.tiling_coords
+    hex_center = tc["hex_center"]
+    tri_idx = tc["tri_idx"]
+    hex_size = tc["hex_size"]
+
+    # Apex vertex of the triangle is at the hex vertex
+    angle_apex = math.pi / 2 - tri_idx * math.pi / 3
+    apex_x = hex_center[0] + hex_size * math.cos(angle_apex)
+    apex_y = hex_center[1] - hex_size * math.sin(angle_apex)
+
+    # Two base vertices are the adjacent hex vertices
+    angle_left = math.pi / 2 - ((tri_idx + 1) % 6) * math.pi / 3
+    left_x = hex_center[0] + hex_size * math.cos(angle_left)
+    left_y = hex_center[1] - hex_size * math.sin(angle_left)
+
+    angle_right = math.pi / 2 - ((tri_idx - 1) % 6) * math.pi / 3
+    right_x = hex_center[0] + hex_size * math.cos(angle_right)
+    right_y = hex_center[1] - hex_size * math.sin(angle_right)
+
+    # The triangle spans from the hex center to two adjacent hex vertices.
+    # Actually the triangle is: center -> vertex[tri_idx] edge to vertex[tri_idx+1].
+    # But the tiling splits each hexagon into 6 triangles from center to each edge.
+    # So the vertices are: hex_center, hex_vertex[tri_idx], hex_vertex[(tri_idx+1)%6].
+    v0 = hex_center
+    v1 = (apex_x, apex_y)
+    v2 = (left_x, left_y)
+
+    tri_patch = MplPolygon(
+        [v0, v1, v2],
+        closed=True,
+        linewidth=linewidth,
+        edgecolor=edgecolor,
+        facecolor=facecolor,
+        alpha=alpha,
+    )
+    ax.add_patch(tri_patch)
+
+
+def _draw_archimedean_cell(ax, cell, facecolor, edgecolor,
+                           linewidth=0.5, alpha=0.85):
+    """Draw an Archimedean tiling cell using its pre-computed vertices."""
+    verts = cell.tiling_coords["vertices"]
+    poly = MplPolygon(
+        verts,
+        closed=True,
+        linewidth=linewidth,
+        edgecolor=edgecolor,
+        facecolor=facecolor,
+        alpha=alpha,
+    )
+    ax.add_patch(poly)
+
+
+# ---------------------------------------------------------------------------
+# Tiling rendering
+# ---------------------------------------------------------------------------
+
+def _pick_sample_cell(cells):
+    """Pick a sample cell that is well-connected (not on the boundary).
+
+    Prefers cells near the center of the layout that have a high neighbor count
+    relative to the maximum possible for the tiling.
+    """
+    if not cells:
+        return None
+
+    # Compute centroid of all cell positions
+    xs = [c.position_hint[0] for c in cells.values()]
+    ys = [c.position_hint[1] for c in cells.values()]
+    cx = sum(xs) / len(xs)
+    cy = sum(ys) / len(ys)
+
+    # Find the maximum neighbor count across all cells
+    max_neighbors = max(len(c.neighbors) for c in cells.values())
+
+    # Score each cell: prefer central cells with many neighbors
+    best_id = None
+    best_score = float("inf")
+    for cell_id, cell in cells.items():
+        dist_to_center = (cell.position_hint[0] - cx) ** 2 + (cell.position_hint[1] - cy) ** 2
+        # Penalize cells with fewer neighbors (boundary cells)
+        neighbor_penalty = (max_neighbors - len(cell.neighbors)) * 0.5
+        score = dist_to_center + neighbor_penalty
+        if score < best_score:
+            best_score = score
+            best_id = cell_id
+
+    return best_id
+
+
+def _compute_stats(cells):
+    """Compute cell count and neighbor count range."""
+    if not cells:
+        return 0, 0, 0
+    neighbor_counts = [len(c.neighbors) for c in cells.values()]
+    return len(cells), min(neighbor_counts), max(neighbor_counts)
+
+
+def render_square_tiling(ax, title_extra=""):
+    """Render the square tiling onto the given axes."""
+    tiling = SquareTiling()
+    width, height = 8, 6
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    cell_count, min_n, max_n = _compute_stats(cells)
+    cell_w = 1.0 / width
+    cell_h = 1.0 / height
+
+    sample_id = _pick_sample_cell(cells)
+    sample_neighbors = set(cells[sample_id].neighbors.values()) if sample_id else set()
+
+    for cell_id, cell in cells.items():
+        n_count = len(cell.neighbors)
+        if cell_id == sample_id:
+            fc = "#F39C12"  # gold for sample cell
+            ec = "#E67E22"
+            lw = 2.0
+        elif cell_id in sample_neighbors:
+            fc = "#F5B041"  # light gold for neighbors
+            ec = "#E67E22"
+            lw = 1.5
+        else:
+            fc = _color_for_neighbor_count(n_count, min_n, max_n)
+            ec = "#2C3E50"
+            lw = 0.5
+        _draw_square_cell(ax, cell, cell_w * 0.95, cell_h * 0.95, fc, ec, lw)
+
+    ax.set_xlim(-0.02, 1.02)
+    ax.set_ylim(-0.02, 1.02)
+    ax.set_aspect("equal")
+    ax.invert_yaxis()
+    ax.set_title(
+        f"Square Tiling (4-connected){title_extra}\n"
+        f"{cell_count} cells, {min_n}-{max_n} neighbors per cell",
+        fontsize=10, fontweight="bold",
+    )
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+def render_hex_tiling(ax, title_extra=""):
+    """Render the hexagonal tiling onto the given axes."""
+    tiling = HexTiling()
+    width, height = 6, 5
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    cell_count, min_n, max_n = _compute_stats(cells)
+
+    # Compute hex size for rendering (same logic as in HexTiling)
+    height_spacing = (height - 1) if height > 1 else 1
+    size_from_w = 0.95 / ((width + 0.5) * math.sqrt(3)) if width > 0 else 0.1
+    size_from_h = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+    hex_size = min(size_from_w, size_from_h)
+
+    sample_id = _pick_sample_cell(cells)
+    sample_neighbors = set(cells[sample_id].neighbors.values()) if sample_id else set()
+
+    for cell_id, cell in cells.items():
+        n_count = len(cell.neighbors)
+        if cell_id == sample_id:
+            fc = "#F39C12"
+            ec = "#E67E22"
+            lw = 2.0
+        elif cell_id in sample_neighbors:
+            fc = "#F5B041"
+            ec = "#E67E22"
+            lw = 1.5
+        else:
+            fc = _color_for_neighbor_count(n_count, min_n, max_n)
+            ec = "#2C3E50"
+            lw = 0.5
+        _draw_hex_cell(ax, cell, hex_size, fc, ec, lw)
+
+    ax.set_xlim(-0.02, 1.02)
+    ax.set_ylim(-0.02, 1.02)
+    ax.set_aspect("equal")
+    ax.set_title(
+        f"Hexagonal Tiling (6-connected){title_extra}\n"
+        f"{cell_count} cells, {min_n}-{max_n} neighbors per cell",
+        fontsize=10, fontweight="bold",
+    )
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+def render_triangle_tiling(ax, title_extra=""):
+    """Render the triangular tiling onto the given axes."""
+    tiling = TriangleTiling()
+    width, height = 4, 3
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    cell_count, min_n, max_n = _compute_stats(cells)
+
+    sample_id = _pick_sample_cell(cells)
+    sample_neighbors = set(cells[sample_id].neighbors.values()) if sample_id else set()
+
+    for cell_id, cell in cells.items():
+        n_count = len(cell.neighbors)
+        if cell_id == sample_id:
+            fc = "#F39C12"
+            ec = "#E67E22"
+            lw = 2.0
+        elif cell_id in sample_neighbors:
+            fc = "#F5B041"
+            ec = "#E67E22"
+            lw = 1.5
+        else:
+            fc = _color_for_neighbor_count(n_count, min_n, max_n)
+            ec = "#2C3E50"
+            lw = 0.5
+        _draw_triangle_cell(ax, cell, fc, ec, lw)
+
+    ax.set_xlim(-0.05, 1.05)
+    ax.set_ylim(-0.05, 1.05)
+    ax.set_aspect("equal")
+    ax.set_title(
+        f"Triangular Tiling (3-connected){title_extra}\n"
+        f"{cell_count} cells, {min_n}-{max_n} neighbors per cell",
+        fontsize=10, fontweight="bold",
+    )
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+def render_3464_tiling(ax, title_extra=""):
+    """Render the 3-4-6-4 rhombitrihexagonal tiling onto the given axes."""
+    tiling = Archimedean3464Tiling()
+    width, height = 3, 3
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    cell_count, min_n, max_n = _compute_stats(cells)
+
+    sample_id = _pick_sample_cell(cells)
+    sample_neighbors = set(cells[sample_id].neighbors.values()) if sample_id else set()
+
+    for cell_id, cell in cells.items():
+        if cell_id == sample_id:
+            fc = "#F39C12"
+            ec = "#E67E22"
+            lw = 2.0
+        elif cell_id in sample_neighbors:
+            fc = "#F5B041"
+            ec = "#E67E22"
+            lw = 1.5
+        else:
+            fc = _color_for_tile_type(cell)
+            ec = "#2C3E50"
+            lw = 0.5
+        _draw_archimedean_cell(ax, cell, fc, ec, lw)
+
+    ax.set_xlim(-0.02, 1.02)
+    ax.set_ylim(-0.02, 1.02)
+    ax.set_aspect("equal")
+    ax.set_title(
+        f"3-4-6-4 Rhombitrihexagonal{title_extra}\n"
+        f"{cell_count} cells, {min_n}-{max_n} neighbors per cell",
+        fontsize=10, fontweight="bold",
+    )
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+def render_488_tiling(ax, title_extra=""):
+    """Render the 4-8-8 truncated square tiling onto the given axes."""
+    tiling = Archimedean488Tiling()
+    width, height = 5, 5
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    cell_count, min_n, max_n = _compute_stats(cells)
+
+    sample_id = _pick_sample_cell(cells)
+    sample_neighbors = set(cells[sample_id].neighbors.values()) if sample_id else set()
+
+    for cell_id, cell in cells.items():
+        if cell_id == sample_id:
+            fc = "#F39C12"
+            ec = "#E67E22"
+            lw = 2.0
+        elif cell_id in sample_neighbors:
+            fc = "#F5B041"
+            ec = "#E67E22"
+            lw = 1.5
+        else:
+            fc = _color_for_tile_type(cell)
+            ec = "#2C3E50"
+            lw = 0.5
+        _draw_archimedean_cell(ax, cell, fc, ec, lw)
+
+    ax.set_xlim(-0.02, 1.02)
+    ax.set_ylim(-0.02, 1.02)
+    ax.set_aspect("equal")
+    ax.set_title(
+        f"4-8-8 Truncated Square{title_extra}\n"
+        f"{cell_count} cells, {min_n}-{max_n} neighbors per cell",
+        fontsize=10, fontweight="bold",
+    )
+    ax.set_xticks([])
+    ax.set_yticks([])
+
+
+# ---------------------------------------------------------------------------
+# Legend
+# ---------------------------------------------------------------------------
+
+def _add_legend(fig):
+    """Add a shared legend showing the color-to-polygon-type mapping."""
+    legend_items = [
+        mpatches.Patch(facecolor=NEIGHBOR_COLORS[3], edgecolor="#2C3E50",
+                       label="Triangle (3 neighbors)"),
+        mpatches.Patch(facecolor=NEIGHBOR_COLORS[4], edgecolor="#2C3E50",
+                       label="Square (4 neighbors)"),
+        mpatches.Patch(facecolor=NEIGHBOR_COLORS[6], edgecolor="#2C3E50",
+                       label="Hexagon (6 neighbors)"),
+        mpatches.Patch(facecolor=NEIGHBOR_COLORS[8], edgecolor="#2C3E50",
+                       label="Octagon (8 neighbors)"),
+        mpatches.Patch(facecolor="#F39C12", edgecolor="#E67E22",
+                       label="Sample cell (highlighted)"),
+        mpatches.Patch(facecolor="#F5B041", edgecolor="#E67E22",
+                       label="Neighbors of sample"),
+    ]
+    fig.legend(
+        handles=legend_items,
+        loc="lower center",
+        ncol=3,
+        fontsize=8,
+        frameon=True,
+        fancybox=True,
+        shadow=False,
+        borderpad=0.8,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Individual image generation
+# ---------------------------------------------------------------------------
+
+def generate_individual_images():
+    """Generate a separate PNG for each tiling type."""
+    renderers = [
+        ("tiling_square.png", render_square_tiling),
+        ("tiling_hex.png", render_hex_tiling),
+        ("tiling_triangle.png", render_triangle_tiling),
+        ("tiling_3464.png", render_3464_tiling),
+        ("tiling_488.png", render_488_tiling),
+    ]
+
+    for filename, render_fn in renderers:
+        fig, ax = plt.subplots(1, 1, figsize=(7, 7))
+        render_fn(ax)
+        _add_legend(fig)
+        fig.tight_layout(rect=[0, 0.08, 1, 1])
+        filepath = os.path.join(_V1_1_DIR, filename)
+        fig.savefig(filepath, dpi=150, bbox_inches="tight",
+                    facecolor="white", edgecolor="none")
+        plt.close(fig)
+        print(f"Saved {filepath}")
+
+
+# ---------------------------------------------------------------------------
+# Comparison image (all five side-by-side)
+# ---------------------------------------------------------------------------
+
+def generate_comparison_image():
+    """Generate a single PNG showing all five tilings side-by-side."""
+    fig, axes = plt.subplots(1, 5, figsize=(30, 7))
+
+    render_square_tiling(axes[0])
+    render_hex_tiling(axes[1])
+    render_triangle_tiling(axes[2])
+    render_3464_tiling(axes[3])
+    render_488_tiling(axes[4])
+
+    fig.suptitle(
+        "MultiGrid Tiling Types -- Cells colored by polygon type",
+        fontsize=14,
+        fontweight="bold",
+        y=0.98,
+    )
+
+    _add_legend(fig)
+    fig.tight_layout(rect=[0, 0.06, 1, 0.94])
+
+    filepath = os.path.join(_V1_1_DIR, "tiling_comparison.png")
+    fig.savefig(filepath, dpi=150, bbox_inches="tight",
+                facecolor="white", edgecolor="none")
+    plt.close(fig)
+    print(f"Saved {filepath}")
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    """Generate all tiling visualizations."""
+    print("Generating individual tiling images...")
+    generate_individual_images()
+    print()
+    print("Generating comparison image...")
+    generate_comparison_image()
+    print()
+    print("Done. All images saved to:", _V1_1_DIR)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/v1_1/visualize_grid.py b/src/v1_1/visualize_grid.py
new file mode 100644
index 00000000..e2b742be
--- /dev/null
+++ b/src/v1_1/visualize_grid.py
@@ -0,0 +1,368 @@
+#!/usr/bin/env python3
+"""
+Visualization script for MultiGrid environments.
+
+This script creates a simple grid environment and visualizes it using matplotlib.
+"""
+
+import sys
+import os
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon, Circle, Rectangle
+import matplotlib.patches as mpatches
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from multigrid.env import MultiGridEnv, TilingRegistry
+from multigrid.tilings import SquareTiling, HexTiling, TriangleTiling
+from multigrid.agent import Action
+
+
+def visualize_grid(tiling_name="square", width=10, height=10):
+    """
+    Visualize a grid with the specified tiling.
+
+    Args:
+        tiling_name: Type of tiling ("square", "hex", or "triangle")
+        width: Grid width in cells
+        height: Grid height in cells
+    """
+    # Create tiling
+    tiling = TilingRegistry.get(tiling_name)
+    cells = tiling.generate_graph(width, height, seed=0)
+
+    # Create figure
+    fig, ax = plt.subplots(1, 1, figsize=(12, 12))
+    ax.set_aspect('equal')
+    ax.set_xlim(-0.1, 1.1)
+    ax.set_ylim(-0.1, 1.1)
+    ax.set_title(f"{tiling_name.capitalize()} Grid ({width}x{height})")
+
+    # Draw cells
+    for cell_id, cell in cells.items():
+        x, y = cell.position_hint
+
+        # Draw cell based on tiling type
+        if tiling_name == "square":
+            # Draw square cell
+            cell_size = 1.0 / width
+            rect = Rectangle(
+                (x - cell_size/2, y - cell_size/2),
+                cell_size, cell_size,
+                facecolor='lightblue',
+                edgecolor='darkblue',
+                linewidth=0.5
+            )
+            ax.add_patch(rect)
+
+        elif tiling_name == "hex":
+            # Draw hexagon cell with proper sizing to match HexTiling coordinate system
+            from matplotlib.patches import RegularPolygon
+
+            # Calculate hex size matching HexTiling._axial_to_normalized()
+            width_spacing = (width - 1) if width > 1 else 1
+            height_spacing = (height - 1) if height > 1 else 1
+            size_from_width = 0.95 / ((width + 0.5) * math.sqrt(3)) if width > 0 else 0.1
+            size_from_height = 0.95 / (height_spacing * 1.5) if height_spacing > 0 else 0.1
+            size = min(size_from_width, size_from_height)
+
+            hexagon = RegularPolygon(
+                (x, y),
+                numVertices=6,
+                radius=size,  # Full size for edge-to-edge tiling
+                orientation=math.pi / 2,  # Point top
+                facecolor='lightblue',
+                edgecolor='darkblue',
+                linewidth=0.5
+            )
+            ax.add_patch(hexagon)
+
+        elif tiling_name == "triangle":
+            # Triangles are subdivisions of hexagons
+            # Parse triangle ID: tri_hexcol_hexrow_triidx
+            parts = cell_id.split("_")
+            if len(parts) == 4:
+                from multigrid.tilings.hex import OffsetCoord, offset_to_axial
+                _, hex_col, hex_row, tri_idx = parts
+                tri_idx = int(tri_idx)
+                hex_col = int(hex_col)
+                hex_row = int(hex_row)
+
+                # Get hex center position
+                offset = OffsetCoord(hex_col, hex_row)
+                axial = offset_to_axial(offset)
+
+                # Calculate hex size (same as HexTiling)
+                width_spacing = (width - 1) if width > 1 else 1
+                height_spacing = (height - 1) if height > 1 else 1
+                size_from_width = 0.95 / ((width + 0.5) * math.sqrt(3))
+                size_from_height = 0.95 / (height_spacing * 1.5)
+                hex_size = min(size_from_width, size_from_height)
+
+                # Calculate hex center in normalized coordinates
+                col_pos = hex_col * math.sqrt(3) * hex_size
+                row_pos = hex_row * 1.5 * hex_size
+                if hex_row % 2 == 1:
+                    col_pos += math.sqrt(3) / 2 * hex_size
+
+                grid_width = (width + 0.5) * math.sqrt(3) * hex_size
+                grid_height = (height - 0.5) * 1.5 * hex_size
+                x_offset = (1.0 - grid_width) / 2
+                y_offset = (1.0 - grid_height) / 2
+
+                hex_center_x = col_pos + x_offset
+                hex_center_y = row_pos + y_offset
+
+                # Calculate the 3 vertices of this triangle
+                # Each triangle has apex at a hex vertex and base edges to adjacent vertices
+                angle_apex = math.pi / 2 - tri_idx * math.pi / 3
+                angle_base1 = math.pi / 2 - ((tri_idx - 1) % 6) * math.pi / 3
+                angle_base2 = math.pi / 2 - ((tri_idx + 1) % 6) * math.pi / 3
+
+                # Apex vertex
+                apex_x = hex_center_x + hex_size * math.cos(angle_apex)
+                apex_y = hex_center_y - hex_size * math.sin(angle_apex)
+
+                # Base vertices (adjacent hex vertices)
+                base1_x = hex_center_x + hex_size * math.cos(angle_base1)
+                base1_y = hex_center_y - hex_size * math.sin(angle_base1)
+
+                base2_x = hex_center_x + hex_size * math.cos(angle_base2)
+                base2_y = hex_center_y - hex_size * math.sin(angle_base2)
+
+                vertices = [
+                    (apex_x, apex_y),
+                    (base1_x, base1_y),
+                    (base2_x, base2_y)
+                ]
+
+                triangle = Polygon(
+                    vertices,
+                    facecolor='lightblue',
+                    edgecolor='darkblue',
+                    linewidth=0.5
+                )
+                ax.add_patch(triangle)
+
+        # Draw cell center point
+        ax.plot(x, y, 'k.', markersize=1)
+
+    # Add legend
+    legend_elements = [
+        mpatches.Patch(facecolor='none', edgecolor='gray', label=f'{len(cells)} cells'),
+        mpatches.Patch(facecolor='none', edgecolor='blue', label=f'{len(tiling.directions)} directions per cell')
+    ]
+    ax.legend(handles=legend_elements, loc='upper right')
+
+    plt.tight_layout()
+    plt.savefig(f'grid_visualization_{tiling_name}.png', dpi=150, bbox_inches='tight')
+    print(f"Saved visualization to grid_visualization_{tiling_name}.png")
+    plt.close()
+
+
+def visualize_environment():
+    """
+    Visualize a complete environment with agent and objects.
+    """
+    # Create a simple task spec
+    task_spec = {
+        "task_id": "demo_001",
+        "seed": 42,
+        "scene": {
+            "bounds": {"width": 1.0, "height": 1.0},
+            "objects": [
+                {
+                    "id": "cube_red",
+                    "type": "movable",
+                    "color": "red",
+                    "position": {"x": 0.7, "y": 0.7},
+                    "size": 0.1
+                },
+                {
+                    "id": "cube_green",
+                    "type": "movable",
+                    "color": "green",
+                    "position": {"x": 0.3, "y": 0.7},
+                    "size": 0.1
+                }
+            ],
+            "agent": {
+                "position": {"x": 0.2, "y": 0.2},
+                "facing": 0
+            }
+        },
+        "goal": {
+            "predicate": "object_in_zone",
+            "object_id": "cube_red",
+            "zone_id": "zone_blue"
+        },
+        "limits": {"max_steps": 100},
+        "tiling": {"type": "square", "grid_size": {"width": 10, "height": 10}}
+    }
+
+    # Create environment
+    env = MultiGridEnv(task_spec, tiling="square")
+    obs, info = env.reset(seed=42)
+
+    # Create figure
+    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
+
+    tiling_types = ["square", "hex", "triangle"]
+
+    for idx, tiling_name in enumerate(tiling_types):
+        ax = axes[idx]
+        ax.set_aspect('equal')
+        ax.set_xlim(-0.1, 1.1)
+        ax.set_ylim(-0.1, 1.1)
+        ax.set_title(f"{tiling_name.capitalize()} Tiling (10x10)")
+
+        # Create environment with this tiling
+        task_spec["tiling"]["type"] = tiling_name
+        env = MultiGridEnv(task_spec, tiling=tiling_name)
+        obs, info = env.reset(seed=42)
+
+        # Draw grid
+        import math
+        from matplotlib.patches import RegularPolygon
+        tiling = env.tiling
+        cell_size = 1.0 / 10
+
+        # Draw all cells
+        for cell_id, cell in tiling.cells.items():
+            x, y = cell.position_hint
+
+            if tiling_name == "square":
+                rect = Rectangle(
+                    (x - cell_size/2, y - cell_size/2),
+                    cell_size, cell_size,
+                    facecolor='lightgray',
+                    edgecolor='gray',
+                    linewidth=0.3
+                )
+                ax.add_patch(rect)
+            elif tiling_name == "hex":
+                # Calculate proper hex size matching HexTiling coordinate system
+                width_spacing = 9  # 10 - 1
+                height_spacing = 9  # 10 - 1
+                size_from_width = 0.95 / ((10 + 0.5) * math.sqrt(3))
+                size_from_height = 0.95 / (height_spacing * 1.5)
+                size = min(size_from_width, size_from_height)
+                hexagon = RegularPolygon(
+                    (x, y),
+                    numVertices=6,
+                    radius=size,  # Full size for edge-to-edge
+                    orientation=math.pi / 2,
+                    facecolor='lightgray',
+                    edgecolor='gray',
+                    linewidth=0.3
+                )
+                ax.add_patch(hexagon)
+            elif tiling_name == "triangle":
+                # Triangles are subdivisions of hexagons
+                # Parse triangle ID: tri_hexcol_hexrow_triidx
+                parts = cell_id.split("_")
+                if len(parts) == 4:
+                    from multigrid.tilings.hex import OffsetCoord, offset_to_axial
+                    _, hex_col, hex_row, tri_idx = parts
+                    tri_idx = int(tri_idx)
+                    hex_col = int(hex_col)
+                    hex_row = int(hex_row)
+
+                    # Get hex center position
+                    offset = OffsetCoord(hex_col, hex_row)
+                    axial = offset_to_axial(offset)
+
+                    # Calculate hex size (same as HexTiling)
+                    width_spacing = 9  # 10 - 1
+                    height_spacing = 9  # 10 - 1
+                    size_from_width = 0.95 / ((10 + 0.5) * math.sqrt(3))
+                    size_from_height = 0.95 / (height_spacing * 1.5)
+                    hex_size = min(size_from_width, size_from_height)
+
+                    # Calculate hex center in normalized coordinates
+                    col_pos = hex_col * math.sqrt(3) * hex_size
+                    row_pos = hex_row * 1.5 * hex_size
+                    if hex_row % 2 == 1:
+                        col_pos += math.sqrt(3) / 2 * hex_size
+
+                    grid_width = (10 + 0.5) * math.sqrt(3) * hex_size
+                    grid_height = (10 - 0.5) * 1.5 * hex_size
+                    x_offset = (1.0 - grid_width) / 2
+                    y_offset = (1.0 - grid_height) / 2
+
+                    hex_center_x = col_pos + x_offset
+                    hex_center_y = row_pos + y_offset
+
+                    # Calculate the 3 vertices of this triangle
+                    angle_apex = math.pi / 2 - tri_idx * math.pi / 3
+                    angle_base1 = math.pi / 2 - ((tri_idx - 1) % 6) * math.pi / 3
+                    angle_base2 = math.pi / 2 - ((tri_idx + 1) % 6) * math.pi / 3
+
+                    # Apex vertex
+                    apex_x = hex_center_x + hex_size * math.cos(angle_apex)
+                    apex_y = hex_center_y - hex_size * math.sin(angle_apex)
+
+                    # Base vertices (adjacent hex vertices)
+                    base1_x = hex_center_x + hex_size * math.cos(angle_base1)
+                    base1_y = hex_center_y - hex_size * math.sin(angle_base1)
+
+                    base2_x = hex_center_x + hex_size * math.cos(angle_base2)
+                    base2_y = hex_center_y - hex_size * math.sin(angle_base2)
+
+                    vertices = [
+                        (apex_x, apex_y),
+                        (base1_x, base1_y),
+                        (base2_x, base2_y)
+                    ]
+
+                    triangle = Polygon(
+                        vertices,
+                        facecolor='lightgray',
+                        edgecolor='gray',
+                        linewidth=0.3
+                    )
+                    ax.add_patch(triangle)
+
+        # Draw agent
+        agent_x, agent_y = tiling.cell_to_canonical(env.state.agent.cell_id)
+        ax.plot(agent_x, agent_y, 'bo', markersize=15, label='Agent')
+
+        # Draw objects
+        for obj in env.state.objects.values():
+            if obj.cell_id:
+                obj_x, obj_y = tiling.cell_to_canonical(obj.cell_id)
+                color_map = {'red': 'r', 'green': 'g', 'blue': 'b'}
+                ax.plot(obj_x, obj_y, f'{color_map.get(obj.color, "k")}s', markersize=10, label=f'{obj.color} cube')
+
+        ax.legend(loc='upper right', fontsize=8)
+        ax.grid(True, alpha=0.2)
+
+    plt.tight_layout()
+    plt.savefig('environment_comparison.png', dpi=150, bbox_inches='tight')
+    print("Saved environment comparison to environment_comparison.png")
+    plt.close()
+
+
+if __name__ == "__main__":
+    print("MultiGrid Visualization Script")
+    print("=" * 50)
+
+    # Visualize different grid types
+    for tiling_name in ["square", "hex", "triangle"]:
+        print(f"\nGenerating {tiling_name} grid visualization...")
+        visualize_grid(tiling_name, width=10, height=10)
+
+    # Visualize complete environments
+    print("\nGenerating environment comparison...")
+    visualize_environment()
+
+    print("\n" + "=" * 50)
+    print("All visualizations generated successfully!")
+    print("\nGenerated files:")
+    print("  - grid_visualization_square.png")
+    print("  - grid_visualization_hex.png")
+    print("  - grid_visualization_triangle.png")
+    print("  - environment_comparison.png")
diff --git a/src/v1_1/visualize_grids_proper.py b/src/v1_1/visualize_grids_proper.py
new file mode 100644
index 00000000..faa93d25
--- /dev/null
+++ b/src/v1_1/visualize_grids_proper.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+"""
+Proper grid visualization showing actual tiled patterns.
+"""
+
+import sys
+import os
+import math
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.patches import Polygon, Circle, RegularPolygon
+import numpy as np
+
+# Add parent directory to path
+sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
+
+from multigrid.tilings import SquareTiling, HexTiling, TriangleTiling
+
+
+def visualize_square_grid(width=10, height=10):
+    """Visualize square grid with proper tiling."""
+    tiling = SquareTiling()
+    tiling.generate_graph(width, height, seed=0)
+
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    ax.set_aspect('equal')
+    ax.set_xlim(-0.05, 1.05)
+    ax.set_ylim(-0.05, 1.05)
+    ax.set_title(f"Square Tiling ({width}×{height} cells, 4 directions per cell)", fontsize=14)
+
+    cell_size = 1.0 / width
+
+    # Draw all cells
+    for cell_id, cell in tiling.cells.items():
+        x_norm, y_norm = cell.position_hint
+
+        # Draw square
+        square = mpatches.Rectangle(
+            (x_norm - cell_size/2, y_norm - cell_size/2),
+            cell_size, cell_size,
+            fill=True,
+            facecolor='lightblue',
+            edgecolor='darkblue',
+            linewidth=0.5
+        )
+        ax.add_patch(square)
+
+        # Draw cell center
+        ax.plot(x_norm, y_norm, 'k.', markersize=1)
+
+    # Highlight a sample cell and its neighbors
+    sample_cell_id = f"sq_5_5"
+    if sample_cell_id in tiling.cells:
+        cell = tiling.cells[sample_cell_id]
+        x, y = cell.position_hint
+
+        # Highlight center cell
+        square = mpatches.Rectangle(
+            (x - cell_size/2, y - cell_size/2),
+            cell_size, cell_size,
+            fill=True,
+            facecolor='yellow',
+            edgecolor='red',
+            linewidth=2
+        )
+        ax.add_patch(square)
+
+        # Highlight neighbors
+        for direction, neighbor_id in cell.neighbors.items():
+            neighbor = tiling.cells[neighbor_id]
+            nx, ny = neighbor.position_hint
+            square = mpatches.Rectangle(
+                (nx - cell_size/2, ny - cell_size/2),
+                cell_size, cell_size,
+                fill=True,
+                facecolor='lightgreen',
+                edgecolor='green',
+                linewidth=1.5
+            )
+            ax.add_patch(square)
+
+    plt.savefig('square_grid_proper.png', dpi=150, bbox_inches='tight')
+    print("Saved square_grid_proper.png")
+    plt.close()
+
+
+def visualize_hex_grid(width=10, height=10):
+    """Visualize hexagonal grid with proper tiling."""
+    tiling = HexTiling()
+    tiling.generate_graph(width, height, seed=0)
+
+    fig, ax = plt.subplots(1, 1, figsize=(12, 10))
+    ax.set_aspect('equal')
+    ax.set_xlim(-0.05, 1.05)
+    ax.set_ylim(-0.05, 1.05)
+    ax.set_title(f"Hexagonal Tiling ({width}×{height} cells, 6 directions per cell)", fontsize=14)
+
+    # Calculate hex size based on grid dimensions
+    hex_width_units = width * math.sqrt(3)
+    hex_height_units = height * 1.5 + 0.5
+    size = min(1.0 / hex_width_units, 1.0 / hex_height_units)
+
+    # Draw all hexagons
+    for cell_id, cell in tiling.cells.items():
+        x_norm, y_norm = cell.position_hint
+
+        # Create hexagon vertices
+        hexagon = RegularPolygon(
+            (x_norm, y_norm),
+            numVertices=6,
+            radius=size * 0.98,  # Slightly smaller to see edges
+            orientation=math.pi / 2,  # Point top
+            facecolor='lightblue',
+            edgecolor='darkblue',
+            linewidth=0.5
+        )
+        ax.add_patch(hexagon)
+
+        # Draw cell center
+        ax.plot(x_norm, y_norm, 'k.', markersize=1)
+
+    # Highlight a sample cell in the middle and its neighbors
+    mid_cells = [c for c in tiling.cells.values() if 0.4 < c.position_hint[0] < 0.6 and 0.4 < c.position_hint[1] < 0.6]
+    if mid_cells:
+        cell = mid_cells[0]
+        x, y = cell.position_hint
+
+        # Highlight center cell
+        hexagon = RegularPolygon(
+            (x, y),
+            numVertices=6,
+            radius=size * 0.98,
+            orientation=math.pi / 2,
+            facecolor='yellow',
+            edgecolor='red',
+            linewidth=2
+        )
+        ax.add_patch(hexagon)
+
+        # Highlight neighbors
+        for direction, neighbor_id in cell.neighbors.items():
+            neighbor = tiling.cells[neighbor_id]
+            nx, ny = neighbor.position_hint
+            hexagon = RegularPolygon(
+                (nx, ny),
+                numVertices=6,
+                radius=size * 0.98,
+                orientation=math.pi / 2,
+                facecolor='lightgreen',
+                edgecolor='green',
+                linewidth=1.5
+            )
+            ax.add_patch(hexagon)
+
+    plt.savefig('hex_grid_proper.png', dpi=150, bbox_inches='tight')
+    print("Saved hex_grid_proper.png")
+    plt.close()
+
+
+def visualize_triangle_grid(width=10, height=10):
+    """Visualize triangular grid with proper tiling."""
+    tiling = TriangleTiling()
+    tiling.generate_graph(width, height, seed=0)
+
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    ax.set_aspect('equal')
+    ax.set_xlim(-0.05, 1.05)
+    ax.set_ylim(-0.05, 1.05)
+    ax.set_title(f"Triangular Tiling ({width}×{height} cells, 3 edges per cell)", fontsize=14)
+
+    cell_size = 1.0 / width
+
+    # Draw all triangles
+    for cell_id, cell in tiling.cells.items():
+        x_norm, y_norm = cell.position_hint
+
+        # Determine if triangle points up or down
+        pointing_up = (cell.row + cell.col) % 2 == 0
+
+        if pointing_up:
+            # Upward pointing triangle
+            vertices = [
+                (x_norm, y_norm - cell_size * 0.4),
+                (x_norm - cell_size * 0.4, y_norm + cell_size * 0.2),
+                (x_norm + cell_size * 0.4, y_norm + cell_size * 0.2)
+            ]
+        else:
+            # Downward pointing triangle
+            vertices = [
+                (x_norm, y_norm + cell_size * 0.4),
+                (x_norm - cell_size * 0.4, y_norm - cell_size * 0.2),
+                (x_norm + cell_size * 0.4, y_norm - cell_size * 0.2)
+            ]
+
+        triangle = Polygon(
+            vertices,
+            fill=True,
+            facecolor='lightblue',
+            edgecolor='darkblue',
+            linewidth=0.5
+        )
+        ax.add_patch(triangle)
+
+        # Draw cell center
+        ax.plot(x_norm, y_norm, 'k.', markersize=1)
+
+    plt.savefig('triangle_grid_proper.png', dpi=150, bbox_inches='tight')
+    print("Saved triangle_grid_proper.png")
+    plt.close()
+
+
+def create_comparison():
+    """Create side-by-side comparison of all three tilings."""
+    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
+
+    tilings = [
+        (SquareTiling(), "Square (4-connected)", 'square_cell'),
+        (HexTiling(), "Hexagonal (6-connected)", 'hex_cell'),
+        (TriangleTiling(), "Triangular (3-connected)", 'tri_cell')
+    ]
+
+    width, height = 8, 8
+
+    for ax, (tiling_obj, title, prefix) in zip(axes, tilings):
+        tiling_obj.generate_graph(width, height, seed=0)
+
+        ax.set_aspect('equal')
+        ax.set_xlim(-0.05, 1.05)
+        ax.set_ylim(-0.05, 1.05)
+        ax.set_title(title, fontsize=12)
+        ax.set_xticks([])
+        ax.set_yticks([])
+
+        if isinstance(tiling_obj, SquareTiling):
+            cell_size = 1.0 / width
+            for cell in list(tiling_obj.cells.values())[:64]:
+                x, y = cell.position_hint
+                square = mpatches.Rectangle(
+                    (x - cell_size/2, y - cell_size/2),
+                    cell_size, cell_size,
+                    fill=True,
+                    facecolor='lightblue',
+                    edgecolor='darkblue',
+                    linewidth=0.8
+                )
+                ax.add_patch(square)
+
+        elif isinstance(tiling_obj, HexTiling):
+            hex_width_units = width * math.sqrt(3)
+            hex_height_units = height * 1.5 + 0.5
+            size = min(1.0 / hex_width_units, 1.0 / hex_height_units)
+
+            for cell in list(tiling_obj.cells.values())[:64]:
+                x, y = cell.position_hint
+                hexagon = RegularPolygon(
+                    (x, y),
+                    numVertices=6,
+                    radius=size * 0.98,
+                    orientation=math.pi / 2,
+                    facecolor='lightblue',
+                    edgecolor='darkblue',
+                    linewidth=0.8
+                )
+                ax.add_patch(hexagon)
+
+        elif isinstance(tiling_obj, TriangleTiling):
+            cell_size = 1.0 / width
+            for cell in list(tiling_obj.cells.values())[:64]:
+                x, y = cell.position_hint
+                pointing_up = (cell.row + cell.col) % 2 == 0
+
+                if pointing_up:
+                    vertices = [
+                        (x, y - cell_size * 0.4),
+                        (x - cell_size * 0.4, y + cell_size * 0.2),
+                        (x + cell_size * 0.4, y + cell_size * 0.2)
+                    ]
+                else:
+                    vertices = [
+                        (x, y + cell_size * 0.4),
+                        (x - cell_size * 0.4, y - cell_size * 0.2),
+                        (x + cell_size * 0.4, y - cell_size * 0.2)
+                    ]
+
+                triangle = Polygon(
+                    vertices,
+                    fill=True,
+                    facecolor='lightblue',
+                    edgecolor='darkblue',
+                    linewidth=0.8
+                )
+                ax.add_patch(triangle)
+
+    plt.tight_layout()
+    plt.savefig('tiling_comparison.png', dpi=150, bbox_inches='tight')
+    print("Saved tiling_comparison.png")
+    plt.close()
+
+
+if __name__ == "__main__":
+    print("Generating proper grid visualizations...")
+    print("=" * 50)
+
+    visualize_square_grid(10, 10)
+    visualize_hex_grid(10, 10)
+    visualize_triangle_grid(10, 10)
+    create_comparison()
+
+    print("=" * 50)
+    print("All visualizations created!")
+    print("\nGenerated files:")
+    print("  - square_grid_proper.png")
+    print("  - hex_grid_proper.png")
+    print("  - triangle_grid_proper.png")
+    print("  - tiling_comparison.png")
diff --git a/src/v1_1/vlm_sanity_check.py b/src/v1_1/vlm_sanity_check.py
new file mode 100644
index 00000000..42ed0e9a
--- /dev/null
+++ b/src/v1_1/vlm_sanity_check.py
@@ -0,0 +1,560 @@
+"""
+VLM Vision Sanity Check
+
+Tests whether a VLM can see and understand MiniGrid rendered images.
+Two test categories:
+  1. Object Identification: Can the VLM identify objects in the scene?
+  2. Spatial Reasoning: Can the VLM describe spatial relationships?
+
+This is NOT an action prediction test. It validates that the VLM's visual
+encoder correctly perceives the gridworld before we ask it to act.
+
+Usage:
+    python vlm_sanity_check.py --model ollama --ollama-model qwen2.5vl:7b
+    python vlm_sanity_check.py --model lmstudio --lmstudio-model local-model
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+import re
+import urllib.request
+import urllib.error
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+
+try:
+    from PIL import Image
+except ImportError:
+    Image = None
+
+
+@dataclass
+class VisionQuestion:
+    """A single vision question about a rendered scene."""
+    question: str
+    expected_keywords: list[str]  # Keywords the answer should contain
+    category: str  # "object_id" or "spatial"
+    difficulty: int = 1  # 1-3
+
+
+@dataclass
+class VisionTestResult:
+    """Result of a single vision test."""
+    question: str
+    category: str
+    expected_keywords: list[str]
+    model_answer: str
+    matched_keywords: list[str]
+    passed: bool
+    error: str | None = None
+
+
+@dataclass
+class SanityCheckReport:
+    """Full report from a sanity check run."""
+    model_name: str
+    task_id: str
+    total_questions: int
+    passed: int
+    failed: int
+    object_id_score: float  # 0-1
+    spatial_score: float  # 0-1
+    results: list[VisionTestResult] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "model_name": self.model_name,
+            "task_id": self.task_id,
+            "total_questions": self.total_questions,
+            "passed": self.passed,
+            "failed": self.failed,
+            "object_id_score": round(self.object_id_score, 3),
+            "spatial_score": round(self.spatial_score, 3),
+            "results": [
+                {
+                    "question": r.question,
+                    "category": r.category,
+                    "expected_keywords": r.expected_keywords,
+                    "model_answer": r.model_answer,
+                    "matched_keywords": r.matched_keywords,
+                    "passed": r.passed,
+                    "error": r.error,
+                }
+                for r in self.results
+            ],
+        }
+
+
+def generate_questions_for_task(task_spec, grid_state) -> list[VisionQuestion]:
+    """Generate vision questions based on a task specification and its current state.
+
+    Args:
+        task_spec: TaskSpecification for the current task.
+        grid_state: GridState from the backend after reset.
+
+    Returns:
+        List of VisionQuestion objects.
+    """
+    questions = []
+
+    # --- Object Identification ---
+
+    # Agent identification
+    questions.append(VisionQuestion(
+        question="Is there an agent (blue triangle) visible in this image? Describe its appearance.",
+        expected_keywords=["agent", "triangle", "blue"],
+        category="object_id",
+        difficulty=1,
+    ))
+
+    # Goal identification
+    questions.append(VisionQuestion(
+        question="Is there a goal marker (green square) in this image? Where is it located?",
+        expected_keywords=["goal", "green"],
+        category="object_id",
+        difficulty=1,
+    ))
+
+    # Wall identification
+    if task_spec.maze.walls:
+        questions.append(VisionQuestion(
+            question="Are there walls (grey barriers) in this gridworld? Describe what you see.",
+            expected_keywords=["wall", "grey", "gray", "barrier"],
+            category="object_id",
+            difficulty=1,
+        ))
+
+    # Key identification
+    if task_spec.mechanisms.keys:
+        key_colors = [k.color for k in task_spec.mechanisms.keys]
+        questions.append(VisionQuestion(
+            question="Are there any keys visible in the image? What color are they?",
+            expected_keywords=["key"] + key_colors,
+            category="object_id",
+            difficulty=1,
+        ))
+
+    # Door identification
+    if task_spec.mechanisms.doors:
+        door_colors = [d.requires_key for d in task_spec.mechanisms.doors]
+        questions.append(VisionQuestion(
+            question="Are there any doors visible in the image? What color are they?",
+            expected_keywords=["door"] + door_colors,
+            category="object_id",
+            difficulty=1,
+        ))
+
+    # Switch identification
+    if task_spec.mechanisms.switches:
+        questions.append(VisionQuestion(
+            question="Is there a switch or button (yellow ball) in this image?",
+            expected_keywords=["switch", "button", "yellow", "ball"],
+            category="object_id",
+            difficulty=2,
+        ))
+
+    # Hazard identification
+    if task_spec.mechanisms.hazards:
+        questions.append(VisionQuestion(
+            question="Are there any hazards (red/orange lava tiles) visible in this image?",
+            expected_keywords=["hazard", "lava", "red", "orange", "danger"],
+            category="object_id",
+            difficulty=2,
+        ))
+
+    # --- Spatial Reasoning ---
+
+    # Grid dimensions
+    w, h = task_spec.maze.dimensions
+    questions.append(VisionQuestion(
+        question=f"This is a {w}x{h} gridworld. How many columns and rows do you see?",
+        expected_keywords=[str(w), str(h), "grid"],
+        category="spatial",
+        difficulty=2,
+    ))
+
+    # Agent direction
+    dir_names = {0: "right", 1: "down", 2: "left", 3: "up"}
+    agent_dir = grid_state.agent_direction
+    questions.append(VisionQuestion(
+        question="Which direction is the agent (blue triangle) facing? (up, down, left, or right)",
+        expected_keywords=[dir_names.get(agent_dir, "right")],
+        category="spatial",
+        difficulty=2,
+    ))
+
+    # Goal relative to agent
+    ax, ay = grid_state.agent_position
+    gx, gy = task_spec.maze.goal.x, task_spec.maze.goal.y
+    rel_parts = []
+    if gy < ay:
+        rel_parts.append("above")
+    elif gy > ay:
+        rel_parts.append("below")
+    if gx > ax:
+        rel_parts.append("right")
+    elif gx < ax:
+        rel_parts.append("left")
+    if not rel_parts:
+        rel_parts = ["same"]
+
+    questions.append(VisionQuestion(
+        question="Where is the goal (green square) relative to the agent (blue triangle)? Is it above, below, left, or right?",
+        expected_keywords=rel_parts,
+        category="spatial",
+        difficulty=2,
+    ))
+
+    # Object count
+    total_objects = (
+        len(task_spec.mechanisms.keys)
+        + len(task_spec.mechanisms.doors)
+        + len(task_spec.mechanisms.switches)
+        + len(task_spec.mechanisms.gates)
+        + len(task_spec.mechanisms.blocks)
+        + len(task_spec.mechanisms.hazards)
+    )
+    if total_objects > 0:
+        questions.append(VisionQuestion(
+            question="How many interactive objects (keys, doors, switches, blocks, hazards) do you see? Give an approximate count.",
+            expected_keywords=[str(total_objects)],
+            category="spatial",
+            difficulty=3,
+        ))
+
+    return questions
+
+
+def check_answer(answer: str, expected_keywords: list[str]) -> tuple[bool, list[str]]:
+    """Check if an answer contains expected keywords.
+
+    Uses case-insensitive matching. An answer passes if it matches
+    at least one keyword from the list.
+
+    Returns:
+        (passed, list of matched keywords)
+    """
+    answer_lower = answer.lower()
+    matched = [kw for kw in expected_keywords if kw.lower() in answer_lower]
+    return len(matched) > 0, matched
+
+
+def ask_vlm_ollama(
+    image: np.ndarray,
+    question: str,
+    model: str = "qwen2.5vl:7b",
+    base_url: str = "http://localhost:11434",
+) -> str:
+    """Ask a vision question to an Ollama VLM.
+
+    Args:
+        image: RGB image array (H, W, 3)
+        question: Text question about the image
+        model: Ollama model name
+        base_url: Ollama server URL
+
+    Returns:
+        Model's text response
+    """
+    if Image is None:
+        raise ImportError("PIL (Pillow) required: pip install Pillow")
+
+    img = Image.fromarray(image)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+
+    prompt = (
+        "You are looking at a rendered gridworld environment from MiniGrid. "
+        "The image shows a top-down view of a grid with various objects.\n\n"
+        "Common objects:\n"
+        "- Agent: blue triangle pointing in its facing direction\n"
+        "- Goal: green square\n"
+        "- Walls: grey squares\n"
+        "- Keys: small colored key shapes\n"
+        "- Doors: colored rectangles that block passages\n"
+        "- Switches: yellow balls\n"
+        "- Hazards: red/orange tiles (lava)\n\n"
+        f"Question: {question}\n\n"
+        "Answer concisely."
+    )
+
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "images": [img_b64],
+        "stream": False,
+        "options": {"temperature": 0.0, "num_predict": 256},
+    }
+
+    req = urllib.request.Request(
+        f"{base_url.rstrip('/')}/api/generate",
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        result = json.loads(resp.read().decode("utf-8"))
+
+    return result.get("response", "")
+
+
+def ask_vlm_lmstudio(
+    image: np.ndarray,
+    question: str,
+    model: str = "local-model",
+    base_url: str = "http://localhost:1234",
+) -> str:
+    """Ask a vision question to an LM Studio VLM via OpenAI-compatible API."""
+    if Image is None:
+        raise ImportError("PIL (Pillow) required: pip install Pillow")
+
+    img = Image.fromarray(image)
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+
+    system_msg = (
+        "You are looking at a rendered gridworld environment from MiniGrid. "
+        "Common objects: agent (blue triangle), goal (green square), "
+        "walls (grey), keys (colored key shapes), doors (colored rectangles), "
+        "switches (yellow balls), hazards (red/orange lava)."
+    )
+
+    payload = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": system_msg},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": question},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{img_b64}"},
+                    },
+                ],
+            },
+        ],
+        "temperature": 0.0,
+        "max_tokens": 256,
+    }
+
+    req = urllib.request.Request(
+        f"{base_url.rstrip('/')}/v1/chat/completions",
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        result = json.loads(resp.read().decode("utf-8"))
+
+    return result["choices"][0]["message"]["content"]
+
+
+def run_sanity_check(
+    task_path: str,
+    ask_fn,
+    model_name: str = "unknown",
+    verbose: bool = True,
+) -> SanityCheckReport:
+    """Run a full sanity check on a task.
+
+    Args:
+        task_path: Path to task JSON file
+        ask_fn: Function(image, question) -> str that queries the VLM
+        model_name: Name for reporting
+        verbose: Print results as they come
+
+    Returns:
+        SanityCheckReport with all results
+    """
+    import sys
+    import os
+
+    _sd = os.path.abspath(os.path.dirname(__file__))
+    if _sd not in sys.path:
+        sys.path.insert(0, _sd)
+
+    from gridworld.task_spec import TaskSpecification
+    from gridworld.backends.minigrid_backend import MiniGridBackend
+
+    spec = TaskSpecification.from_json(task_path)
+    backend = MiniGridBackend(render_mode="rgb_array")
+    backend.configure(spec)
+    obs, state, _ = backend.reset(seed=spec.seed)
+
+    questions = generate_questions_for_task(spec, state)
+    results = []
+
+    if verbose:
+        print(f"\n=== VLM Sanity Check: {spec.task_id} ===")
+        print(f"Model: {model_name}")
+        print(f"Questions: {len(questions)}")
+        print()
+
+    for q in questions:
+        try:
+            answer = ask_fn(obs, q.question)
+            passed, matched = check_answer(answer, q.expected_keywords)
+            result = VisionTestResult(
+                question=q.question,
+                category=q.category,
+                expected_keywords=q.expected_keywords,
+                model_answer=answer.strip(),
+                matched_keywords=matched,
+                passed=passed,
+            )
+        except Exception as e:
+            result = VisionTestResult(
+                question=q.question,
+                category=q.category,
+                expected_keywords=q.expected_keywords,
+                model_answer="",
+                matched_keywords=[],
+                passed=False,
+                error=str(e),
+            )
+
+        results.append(result)
+
+        if verbose:
+            status = "PASS" if result.passed else "FAIL"
+            print(f"[{status}] [{q.category}] {q.question}")
+            if result.error:
+                print(f"  ERROR: {result.error}")
+            else:
+                print(f"  Answer: {result.model_answer[:120]}...")
+                print(f"  Matched: {result.matched_keywords} / Expected: {q.expected_keywords}")
+            print()
+
+    # Compute scores
+    obj_results = [r for r in results if r.category == "object_id"]
+    spatial_results = [r for r in results if r.category == "spatial"]
+
+    obj_score = sum(r.passed for r in obj_results) / max(len(obj_results), 1)
+    spatial_score = sum(r.passed for r in spatial_results) / max(len(spatial_results), 1)
+
+    report = SanityCheckReport(
+        model_name=model_name,
+        task_id=spec.task_id,
+        total_questions=len(results),
+        passed=sum(r.passed for r in results),
+        failed=sum(not r.passed for r in results),
+        object_id_score=obj_score,
+        spatial_score=spatial_score,
+        results=results,
+    )
+
+    if verbose:
+        print(f"=== Results ===")
+        print(f"Total: {report.passed}/{report.total_questions}")
+        print(f"Object ID: {report.object_id_score:.0%}")
+        print(f"Spatial:   {report.spatial_score:.0%}")
+
+    return report
+
+
+def run_sanity_check_all_tiers(
+    ask_fn,
+    model_name: str = "unknown",
+    tasks_dir: str = "gridworld/tasks",
+    verbose: bool = True,
+) -> list[SanityCheckReport]:
+    """Run sanity check across representative tasks from each tier.
+
+    Picks one task per tier for efficiency.
+    """
+    from pathlib import Path
+    tasks_path = Path(tasks_dir)
+    reports = []
+
+    # Pick one representative task per tier
+    representative_tasks = {
+        1: "maze_rooms_003.json",      # Walls only
+        2: "colored_doors_003.json",    # Keys + doors
+        3: "key_switch_001.json",       # Keys + doors + switches + gates
+        4: "push_block_001.json",       # Blocks
+        5: "memory_003.json",           # Multi-mechanism
+    }
+
+    for tier, task_file in sorted(representative_tasks.items()):
+        task_path = tasks_path / f"tier{tier}" / task_file
+        if not task_path.exists():
+            if verbose:
+                print(f"[SKIP] Tier {tier}: {task_file} not found")
+            continue
+
+        report = run_sanity_check(
+            str(task_path), ask_fn, model_name, verbose
+        )
+        reports.append(report)
+
+    if verbose and reports:
+        print(f"\n=== Overall Summary ({model_name}) ===")
+        avg_obj = sum(r.object_id_score for r in reports) / len(reports)
+        avg_spatial = sum(r.spatial_score for r in reports) / len(reports)
+        avg_total = sum(r.passed for r in reports) / sum(r.total_questions for r in reports)
+        print(f"Average Object ID:  {avg_obj:.0%}")
+        print(f"Average Spatial:    {avg_spatial:.0%}")
+        print(f"Average Total:      {avg_total:.0%}")
+
+    return reports
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="VLM Vision Sanity Check")
+    parser.add_argument("--model", choices=["ollama", "lmstudio"], default="ollama")
+    parser.add_argument("--ollama-model", default="qwen2.5vl:7b")
+    parser.add_argument("--lmstudio-model", default="local-model")
+    parser.add_argument("--base-url", default=None)
+    parser.add_argument("--task", default=None, help="Specific task JSON path")
+    parser.add_argument("--all-tiers", action="store_true", help="Run across all tiers")
+    parser.add_argument("--output", default=None, help="Save results JSON")
+    args = parser.parse_args()
+
+    # Build ask function
+    if args.model == "ollama":
+        base_url = args.base_url or "http://localhost:11434"
+        vlm_model = args.ollama_model
+        model_name = f"ollama_{vlm_model}"
+
+        def ask_fn(image, question):
+            return ask_vlm_ollama(image, question, model=vlm_model, base_url=base_url)
+
+    elif args.model == "lmstudio":
+        base_url = args.base_url or "http://localhost:1234"
+        vlm_model = args.lmstudio_model
+        model_name = f"lmstudio_{vlm_model}"
+
+        def ask_fn(image, question):
+            return ask_vlm_lmstudio(image, question, model=vlm_model, base_url=base_url)
+
+    if args.all_tiers:
+        reports = run_sanity_check_all_tiers(ask_fn, model_name)
+        if args.output:
+            with open(args.output, "w") as f:
+                json.dump([r.to_dict() for r in reports], f, indent=2)
+            print(f"\nResults saved to {args.output}")
+    elif args.task:
+        report = run_sanity_check(args.task, ask_fn, model_name)
+        if args.output:
+            with open(args.output, "w") as f:
+                json.dump(report.to_dict(), f, indent=2)
+            print(f"\nResults saved to {args.output}")
+    else:
+        # Default: run on a tier 2 task (has keys + doors, good visual variety)
+        default_task = "gridworld/tasks/tier2/colored_doors_003.json"
+        report = run_sanity_check(default_task, ask_fn, model_name)
+        if args.output:
+            with open(args.output, "w") as f:
+                json.dump(report.to_dict(), f, indent=2)
+            print(f"\nResults saved to {args.output}")