From 1494b14d46de749fff913364d31b3b4b8a94b3bc Mon Sep 17 00:00:00 2001 From: r2dedios Date: Fri, 6 Feb 2026 18:20:07 +0100 Subject: [PATCH 1/3] chore(rh-virt): Added vm-creator/lifecycle-manager/inventory Skills for openshift-mcp-server Signed-off-by: r2dedios --- .claude-plugin/marketplace.json | 12 + docs/mcp.json | 13 + rh-virt/.claude-plugin/plugin.json | 13 + rh-virt/.mcp.json | 11 + rh-virt/README.md | 333 ++++++++- rh-virt/docs/troubleshooting.md | 440 ++++++++++++ rh-virt/skills/vm-creator/SKILL.md | 709 +++++++++++++++++++ rh-virt/skills/vm-inventory/SKILL.md | 601 ++++++++++++++++ rh-virt/skills/vm-lifecycle-manager/SKILL.md | 586 +++++++++++++++ 9 files changed, 2716 insertions(+), 2 deletions(-) create mode 100644 rh-virt/.claude-plugin/plugin.json create mode 100644 rh-virt/.mcp.json create mode 100644 rh-virt/docs/troubleshooting.md create mode 100644 rh-virt/skills/vm-creator/SKILL.md create mode 100644 rh-virt/skills/vm-inventory/SKILL.md create mode 100644 rh-virt/skills/vm-lifecycle-manager/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 82c0c59..446352b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -33,6 +33,18 @@ "category": "developer", "agents": ["./agents/error-handling.md", "./agents/s2i-builder-images.md"], "skills": "./skills" + }, + { + "name": "openshift-virtualization", + "description": "OpenShift Virtualization Agentic Collection", + "version": "1.0.0", + "author": { + "name": "Red Hat Ecosystem Engineering", + "email": "eco-engineering@redhat.com" + }, + "source": "./rh-virt", + "category": "virtualization", + "skills": "./skills" } ] } \ No newline at end of file diff --git a/docs/mcp.json b/docs/mcp.json index 3e94948..f9eef24 100644 --- a/docs/mcp.json +++ b/docs/mcp.json @@ -172,5 +172,18 @@ "description": "Get the current status of an Ansible playbook execution job. Returns job status, timestamps, and completion information." } ] + }, + "openshift-virtualization": { + "repository": "https://github.com/openshift/openshift-mcp-server", + "tools": [ + { + "name": "vm_create", + "description": "Create new VirtualMachine resources in OpenShift Virtualization. Accepts namespace (required), name (required), workload/OS (default: fedora), size (small/medium/large), storage (default: 30Gi), autostart flag (default: false), optional networks (Multus NetworkAttachmentDefinitions), performance family hint (u1/o1/c1/m1), instance type, and preference. Automatically resolves instance types, preferences, and container disk images for common OS distributions (fedora, ubuntu, centos, centos-stream, debian, rhel, opensuse). Returns created VirtualMachine resource or error details." + }, + { + "name": "vm_lifecycle", + "description": "Manage VirtualMachine power state and lifecycle operations in OpenShift Virtualization. Accepts namespace (required), name (required), and action (required: start/stop/restart). Start sets runStrategy to Always (VM runs and auto-restarts on crash), stop sets runStrategy to Halted (VM stops and stays off), restart performs stop then start sequence. Returns updated VirtualMachine state or error if VM not found, permission denied, or already in desired state." + } + ] } } \ No newline at end of file diff --git a/rh-virt/.claude-plugin/plugin.json b/rh-virt/.claude-plugin/plugin.json new file mode 100644 index 0000000..8c2986f --- /dev/null +++ b/rh-virt/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "openshift-virtualization", + "version": "1.0.0", + "description": "OpenShift Virtualization Agentic Collection", + "author": { + "name": "Red Hat Ecosystem Engineering", + "email": "eco-engineering@redhat.com" + }, + "homepage": "https://github.com/RHEcosystemAppEng/agentic-collections", + "repository": "https://github.com/RHEcosystemAppEng/agentic-collections", + "license": "Apache-2.0", + "keywords": ["red-hat", "kubevirt", "openshift", "virtualization"] +} diff --git a/rh-virt/.mcp.json b/rh-virt/.mcp.json new file mode 100644 index 0000000..7fd261b --- /dev/null +++ b/rh-virt/.mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } +} diff --git a/rh-virt/README.md b/rh-virt/README.md index cf5b40f..4bef002 100644 --- a/rh-virt/README.md +++ b/rh-virt/README.md @@ -1,6 +1,335 @@ # Red Hat Virtualization Agentic Pack -Virtualization management and administration tools for Red Hat virtualization platforms. This pack provides automation capabilities for VM lifecycle management, resource allocation, hypervisor configuration, and virtual infrastructure operations. +OpenShift Virtualization management tools for administering virtual machines on OpenShift clusters. This pack provides automation capabilities for VM lifecycle management, provisioning, and inventory operations using KubeVirt. -**Persona**: Virtualization Administrator +**Persona**: Virtualization Administrator, OpenShift Administrator **Marketplaces**: Claude Code, Cursor + +## Overview + +The rh-virt collection provides specialized tools for managing virtual machines in OpenShift Virtualization environments: + +- **3 specialized skills** for VM administration tasks +- **OpenShift MCP server integration** for KubeVirt operations +- **VM lifecycle management** from creation to operational monitoring + +## Quick Start + +### Prerequisites + +- Claude Code CLI or IDE extension +- OpenShift cluster (>= 4.19) with Virtualization operator installed +- ServiceAccount with appropriate RBAC permissions for VirtualMachine resources +- KUBECONFIG environment variable configured with cluster access + +### Environment Setup + +Configure OpenShift cluster access: + +```bash +export KUBECONFIG="/path/to/your/kubeconfig" +``` + +Verify access to the cluster: + +```bash +oc get virtualmachines -A +# or +kubectl get vms -A +``` + +### Installation (Claude Code) + +Install the pack as a Claude Code plugin: + +```bash +claude plugin marketplace add https://github.com/RHEcosystemAppEng/agentic-collections +claude plugin install openshift-virtualization +``` + +Or for local development: + +```bash +claude plugin marketplace add /path/to/agentic-collections +claude plugin install openshift-virtualization +``` + +## Skills + +The pack provides 3 specialized skills for common virtualization operations: + +### 1. **vm-creator** - Virtual Machine Provisioning + +Create new virtual machines in OpenShift Virtualization with automatic error diagnosis and workarounds. + +**Use when:** +- "Create a new VM" +- "Deploy a virtual machine" +- "Provision a VM with specific configuration" + +**What it does:** +- Creates VirtualMachine resources +- Configures instance specifications +- Sets up storage and networking +- **Automatically diagnoses scheduling issues** (e.g., node taints, resource constraints) +- **Proposes workarounds** for MCP tool limitations +- **Applies fixes** with user confirmation (human-in-the-loop) + +### 2. **vm-lifecycle-manager** - VM Power Management + +Control VM lifecycle operations including start, stop, and restart. + +**Use when:** +- "Start VM [name]" +- "Stop the virtual machine [name]" +- "Restart VM [name]" + +**What it does:** +- Starts stopped/halted VMs +- Stops running VMs gracefully +- Restarts VMs (stop + start sequence) +- Manages VM runStrategy transitions + +### 3. **vm-inventory** - VM Discovery and Status + +List and inspect virtual machines across namespaces. + +**Use when:** +- "List all VMs" +- "Show VMs in namespace [name]" +- "Get details of VM [name]" +- "What VMs are running?" + +**What it does:** +- Lists VMs across namespaces +- Shows VM status and health +- Provides detailed VM configuration +- Filters VMs by labels or fields + +## MCP Server Integration + +The pack integrates with the OpenShift MCP server (configured in `.mcp.json`): + +### **openshift-virtualization** - OpenShift MCP Server (KubeVirt Toolset) + +Provides access to KubeVirt virtual machine operations through the Model Context Protocol. + +**Repository**: https://github.com/openshift/openshift-mcp-server + +**Available Tools**: +- `vm_create` - Create new VirtualMachines +- `vm_lifecycle` - Manage VM power state (start/stop/restart) + +**Configuration**: +```json +{ + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } +} +``` + +## Sample Workflows + +### Workflow 1: Create and Start VM + +``` +User: "Create a VM called web-server in namespace production" +→ vm-creator skill creates the VM + +User: "Start the web-server VM" +→ vm-lifecycle-manager skill starts the VM + +User: "Check if it's running" +→ vm-inventory skill shows VM status +``` + +### Workflow 2: VM Inventory Check + +``` +User: "Show all VMs in production namespace" +→ vm-inventory skill lists all VMs with status + +User: "What's the status of database-vm?" +→ vm-inventory skill shows detailed VM information +``` + +### Workflow 3: VM Lifecycle Management + +``` +User: "Stop all VMs in development namespace" +→ vm-lifecycle-manager skill stops each VM + +User: "Restart the api-server VM" +→ vm-lifecycle-manager skill restarts the VM +``` + +### Workflow 4: Automatic Error Diagnosis and Remediation + +``` +User: "Create a Fedora VM called test-vm in namespace demo" +→ vm-creator skill creates the VM +→ Detects ErrorUnschedulable status +→ Consults troubleshooting.md documentation +→ Diagnoses: Node taints prevent scheduling +→ Proposes workaround: Add tolerations to VM spec + +Agent: "⚠️ VM Scheduling Issue Detected + Root Cause: Node taints prevent VM scheduling + + I can apply a workaround to add the required tolerations. + How would you like to proceed?" + +User: "apply workaround" +→ vm-creator patches VM with tolerations +→ Verifies VM can now be scheduled +→ Reports success + +Agent: "✓ Workaround Applied Successfully + VM can now be scheduled on virtualization nodes" +``` + +**Key Features**: +- **Automatic diagnosis**: Detects ErrorUnschedulable and other common errors +- **Documentation consultation**: Reads troubleshooting.md for domain knowledge +- **Intelligent workarounds**: Proposes fixes for MCP tool limitations +- **Human-in-the-loop**: Requires explicit user confirmation before applying patches +- **Transparent**: Explains temporary limitations and suggests filing enhancement requests + +## Configuration + +MCP server is configured in `.mcp.json`: + +```json +{ + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } +} +``` + +**Key Configuration Notes**: +- Uses `KUBECONFIG` environment variable for cluster authentication +- Requires OpenShift Virtualization operator installed on the cluster +- ServiceAccount needs RBAC permissions for VirtualMachine resources + +## Troubleshooting + +### Automatic Diagnosis (Recommended) + +The **vm-creator** skill includes automatic error diagnosis and workaround proposals. When VMs encounter scheduling issues: + +1. **Detection**: Skill automatically detects ErrorUnschedulable and other error states +2. **Diagnosis**: Consults `docs/troubleshooting.md` to understand root cause +3. **Investigation**: Executes diagnostic commands (node taints, resource availability, events) +4. **Proposal**: Presents clear diagnosis with workaround options +5. **Remediation**: Applies fix with user confirmation (human-in-the-loop) + +**Common Issues Handled**: +- **ErrorUnschedulable** - Node taints/tolerations mismatch, resource constraints, node selector issues +- **ErrorDataVolumeNotReady** - Storage provisioning delays, storage class issues, quota exceeded + +**For comprehensive troubleshooting guidance**, see [docs/troubleshooting.md](docs/troubleshooting.md). + +### MCP Server Won't Start + +**Problem**: Server fails to connect to cluster + +**Solutions**: +1. Verify KUBECONFIG is set: `echo $KUBECONFIG` +2. Test cluster access: `oc get nodes` or `kubectl get nodes` +3. Check ServiceAccount permissions: `oc auth can-i create virtualmachines -A` + +### VM Operations Fail + +**Problem**: VM creation or lifecycle operations return errors + +**Solutions**: +1. Verify OpenShift Virtualization operator is installed +2. Check namespace exists and ServiceAccount has access +3. Verify RBAC permissions for VirtualMachine resources +4. Check cluster resource availability (CPU, memory, storage) +5. Let vm-creator skill run automatic diagnosis (see Workflow 4 above) + +### Skills Not Triggering + +**Problem**: Skills don't activate on expected queries + +**Solutions**: +1. Verify plugin installed: `claude plugin list` +2. Reload Claude Code to refresh plugins +3. Check skill descriptions match query intent +4. Use explicit phrasing from skill examples + +## Architecture Reference + +### Directory Structure + +``` +rh-virt/ +├── README.md # This file +├── .claude-plugin/ +│ └── plugin.json # Plugin metadata +├── .mcp.json # MCP server configuration +├── docs/ # AI-optimized knowledge base +│ └── troubleshooting.md # VM error diagnosis and workarounds +└── skills/ + ├── vm-creator/SKILL.md # VM provisioning with auto-diagnosis + ├── vm-lifecycle-manager/SKILL.md # VM power management + └── vm-inventory/SKILL.md # VM discovery and status +``` + +### Key Patterns + +- **Skills encapsulate operations** - Each skill handles one category of VM tasks +- **MCP provides tools** - OpenShift MCP server exposes KubeVirt operations +- **Environment-based auth** - KUBECONFIG for secure cluster access +- **Automatic diagnosis** - Skills detect errors, consult docs, propose workarounds +- **Document consultation** - Skills read troubleshooting.md for domain knowledge +- **Human-in-the-loop** - User approval required before applying fixes +- **Workaround transparency** - Clear communication of MCP tool limitations and temporary solutions + +## Security Model + +**Cluster access**: +- Uses KUBECONFIG for authentication +- Respects Kubernetes RBAC permissions +- ServiceAccount-based authorization +- No credential storage or caching + +**VM operations**: +- Namespace isolation enforced +- Resource quotas respected +- All operations audited in Kubernetes API logs + +## Development + +See main repository [README.md](../README.md) for: +- Adding new skills +- Creating agents +- Integrating MCP servers +- Testing and validation + +## License + +[Apache 2.0](../LICENSE) + +## References + +- [OpenShift Virtualization Documentation](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) +- [KubeVirt User Guide](https://kubevirt.io/user-guide/) +- [OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server) +- [MCP Protocol Specification](https://modelcontextprotocol.io/) +- [Main Repository](https://github.com/RHEcosystemAppEng/agentic-collections) diff --git a/rh-virt/docs/troubleshooting.md b/rh-virt/docs/troubleshooting.md new file mode 100644 index 0000000..f92c990 --- /dev/null +++ b/rh-virt/docs/troubleshooting.md @@ -0,0 +1,440 @@ +--- +title: VM Troubleshooting Guide +category: kubevirt +sources: + - title: KubeVirt User Guide - Node Placement + url: https://kubevirt.io/user-guide/virtual_machines/node_placement/ + date_accessed: 2026-02-06 + - title: Kubernetes Taints and Tolerations + url: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + date_accessed: 2026-02-06 + - title: OpenShift Virtualization - Virtual Machine Status + url: https://docs.openshift.com/container-platform/latest/virt/virtual_machines/virt-managing-vms.html + date_accessed: 2026-02-06 +tags: [troubleshooting, scheduling, taints, tolerations, errors] +semantic_keywords: [ErrorUnschedulable, ErrorDataVolumeNotReady, scheduling failure, node taints, VM status] +use_cases: [vm-creation, diagnostics, error-handling] +last_updated: 2026-02-06 +--- + +# VM Troubleshooting Guide + +## Overview + +This guide provides diagnostic procedures and workarounds for common VirtualMachine errors in OpenShift Virtualization. Use this document when VMs fail to schedule, provision, or start properly. + +## Common VM Status Errors + +### ErrorUnschedulable + +**Symptom**: VM shows status `ErrorUnschedulable` after creation + +**Description**: The Kubernetes scheduler cannot find a suitable node to run the VM's underlying virt-launcher pod. + +**Possible Causes**: + +#### 1. Node Taints (Most Common) + +Nodes have taints that the VM doesn't tolerate. Common in environments with dedicated virtualization infrastructure. + +**Diagnostic Commands**: +```bash +# Check VM events for scheduling failures +oc describe vm -n | grep -A 10 "Events:" + +# Look for messages like: +# "0/X nodes are available: X node(s) had taints that the pod didn't tolerate" + +# Check node taints in the cluster +oc get nodes -o json | jq '.items[] | select(.spec.taints != null) | {name: .metadata.name, taints: .spec.taints}' + +# Alternative: Show taints in table format +oc get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints +``` + +**Common Taint Patterns**: +- `virtualization=true:NoSchedule` - Only VMs with matching toleration can schedule +- `node-role.kubernetes.io/infra:NoSchedule` - Infrastructure-only nodes +- `node.kubernetes.io/not-ready:NoSchedule` - Node not ready for workloads + +**Solution - Add Tolerations to VM**: + +The openshift-virtualization MCP server's `vm_create` tool does NOT currently support the `tolerations` parameter. This requires a manual workaround: + +**Workaround (post-creation patch)**: +```bash +# Patch the VirtualMachine to add tolerations +oc patch vm -n --type=merge -p ' +spec: + template: + spec: + tolerations: + - key: "virtualization" + operator: "Equal" + value: "true" + effect: "NoSchedule" +' + +# Verify tolerations were added +oc get vm -n -o jsonpath='{.spec.template.spec.tolerations}' | jq + +# Check if VM status improved (wait 5-10 seconds) +oc get vm -n -o jsonpath='{.status.printableStatus}' +``` + +**Example - Multiple Tolerations**: +```bash +oc patch vm -n --type=merge -p ' +spec: + template: + spec: + tolerations: + - key: "virtualization" + operator: "Equal" + value: "true" + effect: "NoSchedule" + - key: "dedicated" + operator: "Equal" + value: "virt-workloads" + effect: "NoSchedule" +' +``` + +**Toleration Operators**: +- `Equal` - Key and value must match exactly +- `Exists` - Only key must exist (ignores value) + +**Toleration Effects**: +- `NoSchedule` - Don't schedule new pods (existing pods continue) +- `PreferNoSchedule` - Avoid scheduling if possible +- `NoExecute` - Don't schedule AND evict existing pods + +**Alternative Solutions**: +1. **Remove node taints** (if you have cluster-admin access): + ```bash + oc adm taint nodes virtualization=true:NoSchedule- + ``` + +2. **Use different nodes** - If non-tainted nodes exist, ensure VM fits + +3. **File enhancement request** - Request tolerations support in openshift-mcp-server: + https://github.com/openshift/openshift-mcp-server/issues + +#### 2. Insufficient Resources + +Not enough CPU, memory, or storage available on any node. + +**Diagnostic Commands**: +```bash +# Check VM resource requests +oc get vm -n -o jsonpath='{.spec.template.spec.domain.resources}' + +# Check node resource availability +oc describe nodes | grep -A 5 "Allocated resources" + +# Look for VM events mentioning "Insufficient" +oc describe vm -n | grep "Insufficient" +``` + +**Example Event**: +``` +0/5 nodes are available: 2 Insufficient cpu, 3 Insufficient memory. +``` + +**Solutions**: +1. **Scale cluster** - Add more worker nodes +2. **Reduce VM resources** - Delete and recreate with smaller instance type +3. **Delete unused VMs** - Free up resources +4. **Check resource quotas**: + ```bash + oc describe quota -n + oc describe limitrange -n + ``` + +#### 3. Node Selector Mismatch + +VM requires specific node labels that don't exist in the cluster. + +**Diagnostic Commands**: +```bash +# Check VM node selector requirements +oc get vm -n -o jsonpath='{.spec.template.spec.nodeSelector}' + +# List available node labels +oc get nodes --show-labels + +# Check if any nodes match the selector +oc get nodes -l = +``` + +**Solution**: +Remove node selector or add required labels to nodes: +```bash +# Option 1: Remove node selector from VM +oc patch vm -n --type=json -p '[{"op": "remove", "path": "/spec/template/spec/nodeSelector"}]' + +# Option 2: Add label to nodes +oc label node = +``` + +--- + +### ErrorDataVolumeNotReady + +**Symptom**: VM shows status `ErrorDataVolumeNotReady` + +**Description**: The DataVolume (persistent storage) backing the VM is not ready. + +**Possible Causes**: + +#### 1. DataVolume Still Provisioning + +Storage provisioning takes time, especially for large disks or when importing images. + +**Diagnostic Commands**: +```bash +# Check DataVolume status +oc get datavolume -n + +# Look for status: Pending, ImportScheduled, ImportInProgress +# Wait for status: Succeeded + +# Get detailed DataVolume information +oc describe datavolume -n + +# Check PVC (PersistentVolumeClaim) bound status +oc get pvc -n +``` + +**Solution**: Wait for DataVolume provisioning to complete (can take 1-5 minutes). + +#### 2. Storage Class Not Found + +The requested storage class doesn't exist in the cluster. + +**Diagnostic Commands**: +```bash +# List available storage classes +oc get storageclass + +# Check DataVolume's requested storage class +oc get datavolume -n -o jsonpath='{.spec.pvc.storageClassName}' +``` + +**Solution**: +1. Use a valid storage class from the cluster +2. Recreate VM with correct storage class parameter + +#### 3. Insufficient Storage Quota + +Namespace has insufficient storage quota to provision the PVC. + +**Diagnostic Commands**: +```bash +# Check resource quotas +oc describe quota -n + +# Check storage usage +oc get pvc -n -o custom-columns=NAME:.metadata.name,STORAGE:.spec.resources.requests.storage,STATUS:.status.phase +``` + +**Solution**: +1. Request quota increase from cluster admin +2. Delete unused PVCs to free quota +3. Reduce VM storage size + +--- + +### ErrorPvcNotFound + +**Symptom**: VM references a PersistentVolumeClaim that doesn't exist. + +**Diagnostic Commands**: +```bash +# List PVCs in namespace +oc get pvc -n + +# Check VM's PVC references +oc get vm -n -o jsonpath='{.spec.template.spec.volumes[*].persistentVolumeClaim.claimName}' +``` + +**Solution**: +- Wait for DataVolume to create the PVC +- Manually create missing PVC +- Fix VM spec to reference correct PVC name + +--- + +## Workaround Patterns for MCP Tool Limitations + +### General Pattern: Diagnose → Propose → Confirm → Execute + +When the MCP tool lacks feature support (e.g., tolerations, node selectors, advanced networking): + +**1. Diagnose** the root cause using Kubernetes commands +```bash +oc describe vm -n +oc get events -n --field-selector involvedObject.name= +``` + +**2. Propose** a manual workaround to the user with clear explanation +- Explain why the issue occurred +- Show what will be changed +- List alternative options + +**3. Confirm** with user before executing (Human-in-the-Loop) +- Wait for explicit "yes" or "apply workaround" +- Never auto-execute modifications + +**4. Execute** the workaround using oc/kubectl +```bash +oc patch vm -n --type=merge -p '...' +``` + +**5. Verify** the fix was successful +```bash +oc get vm -n -o jsonpath='{.status.printableStatus}' +``` + +**6. Document** the limitation and suggest filing enhancement request +- Note this is temporary until MCP tool is enhanced +- Provide link to file issue: https://github.com/openshift/openshift-mcp-server/issues + +### Example: Adding Tolerations Workaround + +**Diagnostic Output**: +```markdown +## ⚠️ VM Scheduling Issue Detected + +**Root Cause**: Node taints prevent VM scheduling + +**Details**: +- Found 3 nodes with taint: `virtualization=true:NoSchedule` +- VM spec does not include matching tolerations +- This prevents VM scheduling on virtualization-dedicated nodes +``` + +**Proposed Workaround**: +```bash +oc patch vm web-server -n vms --type=merge -p ' +spec: + template: + spec: + tolerations: + - key: "virtualization" + operator: "Equal" + value: "true" + effect: "NoSchedule" +' +``` + +**User Confirmation Required**: +``` +How would you like to proceed? +- "apply workaround" - I'll patch the VM with tolerations +- "manual" - I'll provide instructions for you to apply manually +- "cancel" - Delete the VM and abort creation +``` + +**After Execution**: +```markdown +## ✓ Workaround Applied Successfully + +**Action**: Added tolerations for taint `virtualization=true:NoSchedule` +**New Status**: Stopped (VM can now be scheduled) + +**Note**: This workaround was needed because the MCP tool doesn't yet support tolerations. +Future VMs in this cluster will need the same fix until the tool is enhanced. +``` + +--- + +## VM Status Reference + +### Status Values + +| Status | Meaning | Action Required | +|--------|---------|-----------------| +| `Stopped` / `Halted` | VM created but not running | Normal - use vm-lifecycle-manager to start | +| `Running` | VM is running | Normal | +| `Provisioning` | VM resources being prepared | Wait 5-10 seconds, check again | +| `Starting` | VM is booting | Wait for Running status | +| `Stopping` | VM is shutting down | Wait for Stopped status | +| `Terminating` | VM is being deleted | Wait for deletion to complete | +| `ErrorUnschedulable` | Cannot find node to run VM | **Action needed** - see ErrorUnschedulable section | +| `ErrorDataVolumeNotReady` | Storage not ready | **Action needed** - see ErrorDataVolumeNotReady section | +| `ErrorPvcNotFound` | PVC missing | **Action needed** - see ErrorPvcNotFound section | +| `CrashLoopBackOff` | VM repeatedly crashing | Check VM logs, guest OS issues | + +### Checking VM Status + +```bash +# Get printable status +oc get vm -n -o jsonpath='{.status.printableStatus}' + +# Get detailed status and conditions +oc get vm -n -o jsonpath='{.status}' | jq + +# Watch status changes in real-time +oc get vm -n -w +``` + +--- + +## Best Practices for Agents + +When implementing diagnostic workflows: + +1. **Always verify VM status** after creation (wait 5-10 seconds first) +2. **Consult this document** when encountering error status values +3. **Provide clear diagnosis** with evidence (show events, node taints, resource availability) +4. **Offer multiple solutions** (automated workaround vs manual steps vs alternative approaches) +5. **Respect human-in-the-loop** for all VM modifications +6. **Document temporary workarounds** and their limitations clearly +7. **Suggest filing issues** for missing MCP tool features + +### Document Consultation Pattern + +```markdown +**Document Consultation** (REQUIRED): +1. **Action**: Read [troubleshooting.md](../../docs/troubleshooting.md) to understand error causes +2. **Output to user**: "I consulted troubleshooting.md to diagnose the issue." +``` + +--- + +## Known MCP Tool Limitations + +### vm_create tool + +**Currently Supported**: +- ✓ Namespace, name (required) +- ✓ Workload/OS selection (fedora, ubuntu, rhel, etc.) +- ✓ Size hints (small, medium, large) +- ✓ Storage size +- ✓ Autostart flag +- ✓ Networks (Multus NetworkAttachmentDefinitions) +- ✓ Performance family (u1, o1, c1, m1) +- ✓ Instance type, preference + +**Not Currently Supported** (requires workarounds): +- ✗ Tolerations (for node taints) +- ✗ Node selectors +- ✗ Affinity/anti-affinity rules +- ✗ Resource requests/limits (beyond instance type) +- ✗ Custom labels/annotations +- ✗ SSH keys injection +- ✗ Cloud-init user data + +**Workaround Strategy**: Use `oc patch` after VM creation to add missing fields. + +**Enhancement Requests**: File issues at https://github.com/openshift/openshift-mcp-server/issues + +--- + +## Additional Resources + +- [KubeVirt Virtual Machine Status Conditions](https://kubevirt.io/user-guide/virtual_machines/vm_status_conditions/) +- [OpenShift Virtualization Troubleshooting](https://docs.openshift.com/container-platform/latest/virt/support/virt-troubleshooting.html) +- [Kubernetes Scheduling Framework](https://kubernetes.io/docs/concepts/scheduling-eviction/scheduling-framework/) +- [OpenShift MCP Server Issues](https://github.com/openshift/openshift-mcp-server/issues) diff --git a/rh-virt/skills/vm-creator/SKILL.md b/rh-virt/skills/vm-creator/SKILL.md new file mode 100644 index 0000000..08a3a83 --- /dev/null +++ b/rh-virt/skills/vm-creator/SKILL.md @@ -0,0 +1,709 @@ +--- +name: vm-creator +description: | + Create new virtual machines in OpenShift Virtualization with automatic instance type resolution and OS selection. Use this skill when users request: + - "Create a new VM" + - "Deploy a virtual machine with [OS]" + - "Set up a VM in namespace [name]" + - "Provision a [size] VM" + + This skill handles VM creation with intelligent defaults for OpenShift Virtualization. + +model: inherit +color: green +--- + +# /vm-creator Skill + +Create virtual machines in OpenShift Virtualization using the `vm_create` tool from the openshift-virtualization MCP server. + +## Critical: Human-in-the-Loop Requirements + +**IMPORTANT:** This skill requires explicit user confirmation before creating VMs. You MUST: + +1. **Wait for user confirmation** on all VM configuration parameters before executing `vm_create` +2. **Do NOT proceed** with VM creation until the user explicitly approves the configuration +3. **Present configuration clearly** in a table format and wait for user response +4. **Never auto-execute** VM creation without user approval - creating VMs is a destructive operation that consumes cluster resources +5. **Allow modifications** - If user wants to change parameters, update and re-confirm before proceeding + +If the user says "no" or wants modifications, address their concerns before proceeding. + +**Why this matters:** +- VM creation consumes cluster resources (CPU, memory, storage) +- VMs persist until explicitly deleted +- Incorrect configuration can impact cluster performance +- User should verify namespace, sizing, and other parameters + +## Prerequisites + +**Required MCP Server**: `openshift-virtualization` ([OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server)) + +**Required MCP Tools**: +- `vm_create` (from openshift-virtualization) - Create VirtualMachine resources + +**Required Environment Variables**: +- `KUBECONFIG` - Path to Kubernetes configuration file with cluster access + +**Required Cluster Setup**: +- OpenShift cluster (>= 4.19) +- OpenShift Virtualization operator installed +- ServiceAccount with RBAC permissions to create VirtualMachine resources +- Namespace with appropriate permissions + +### Prerequisite Verification + +**Before executing, verify MCP server availability:** + +1. **Check MCP Server Configuration** + - Verify `openshift-virtualization` exists in `.mcp.json` + - If missing → Report to user with setup instructions + +2. **Check Environment Variables** + - Verify `KUBECONFIG` is set (check presence only, never expose value) + - If missing → Report to user + +3. **Verify Cluster Access** (optional quick check) + - Test basic connectivity to cluster + - If fails → Report connection error + +**Human Notification Protocol:** + +When prerequisites fail: + +``` +❌ Cannot execute vm-creator: MCP server 'openshift-virtualization' is not available + +📋 Setup Instructions: +1. Add openshift-virtualization to .mcp.json: + { + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } + } +2. Set KUBECONFIG environment variable: + export KUBECONFIG="/path/to/your/kubeconfig" +3. Restart Claude Code to reload MCP servers + +🔗 Documentation: https://github.com/openshift/openshift-mcp-server + +❓ How would you like to proceed? +Options: +- "setup" - Help configure the MCP server now +- "skip" - Skip this skill +- "abort" - Stop workflow + +Please respond with your choice. +``` + +⚠️ **SECURITY**: Never display actual KUBECONFIG path or credential values in output. + +## When to Use This Skill + +**Trigger this skill when:** +- User explicitly invokes `/vm-creator` command +- User requests creating a new virtual machine +- Deploying VMs with specific OS (Fedora, Ubuntu, RHEL, CentOS, Debian) +- Setting up VMs with custom sizing (small, medium, large) +- Provisioning VMs with specific storage requirements + +**User phrases that trigger this skill:** +- "Create a Fedora VM in namespace vms" +- "Deploy a medium Ubuntu VM with 100Gi disk" +- "Set up a RHEL VM called database-01" +- "/vm-creator" (explicit command) + +**Do NOT use this skill when:** +- User wants to start/stop existing VMs → Use `/vm-lifecycle-manager` skill instead +- User wants to list VMs → Use `/vm-inventory` skill instead +- User only wants information about VMs (not creation) → Use `/vm-inventory` skill instead + +## Workflow + +### Step 1: Gather VM Requirements and Confirm Configuration + +**Collect information from user**, then present for confirmation before proceeding. + +**Required Parameters:** +1. **VM Name** - Name for the virtual machine + - Example: "web-server", "database-01", "test-vm" + +2. **Namespace** - OpenShift namespace where VM will be created + - Example: "vms", "production", "dev-environment" + +**Optional Parameters (with defaults):** +3. **Operating System** (`workload`) - Default: `"fedora"` + - Supported: `fedora`, `ubuntu`, `centos`, `centos-stream`, `debian`, `rhel`, `opensuse`, `opensuse-tumbleweed`, `opensuse-leap` + - Can also accept full container disk image URLs + +4. **Size** (`size`) - VM sizing hint + - Options: `small`, `medium`, `large`, `xlarge` + - If not specified, MCP server uses default instance type + +5. **Storage** (`storage`) - Default: `"30Gi"` + - Root disk size: `"30Gi"`, `"50Gi"`, `"100Gi"`, etc. + +6. **Autostart** (`autostart`) - Default: `false` + - `true`: VM starts automatically after creation + - `false`: VM created in halted state + +**After gathering parameters, present configuration for confirmation:** + +```markdown +## Virtual Machine Configuration + +**Please review and confirm the VM configuration:** + +| Parameter | Value | Notes | +|-----------|-------|-------| +| VM Name | `web-server` | [from user input] | +| Namespace | `vms` | [from user input] | +| Operating System | `fedora` | [default / user specified] | +| Size | `medium` | [user specified / omitted for default] | +| Storage | `50Gi` | [user specified / default: 30Gi] | +| Autostart | `no` | [default / user specified] | + +**This will create a new VirtualMachine resource consuming cluster resources.** + +Confirm these settings or tell me what to change. +- yes - Proceed with VM creation +- no - Cancel operation +- modify - Change specific parameters +``` + +**WAIT for user confirmation before proceeding to Step 2.** Do NOT continue until user explicitly confirms with "yes" or provides modifications. + +### Step 2: Create the Virtual Machine + +**ONLY PROCEED AFTER USER CONFIRMATION IN STEP 1.** + +**Use the openshift-virtualization MCP tool:** + +Call `vm_create` with the confirmed parameters from Step 1. + +**MCP Tool**: `vm_create` (from openshift-virtualization) + +**Parameters** (based on user input from Step 1): + +```json +{ + "namespace": "", // REQUIRED + "name": "", // REQUIRED + "workload": "", // OPTIONAL (default: "fedora") + "size": "", // OPTIONAL + "storage": "", // OPTIONAL (default: "30Gi") + "autostart": // OPTIONAL (default: false) +} +``` + +**Example tool invocation:** +```json +vm_create({ + "namespace": "vms", + "name": "web-server", + "workload": "fedora", + "size": "medium", + "storage": "50Gi", + "autostart": false +}) +``` + +**Expected Output:** +- Success: VirtualMachine resource created +- Failure: Error message with reason (permissions, resources, etc.) + +**Error Handling:** +- If namespace doesn't exist: Report error, suggest creating namespace first +- If insufficient resources: Report error with resource requirements +- If permission denied: Report RBAC error, suggest checking ServiceAccount permissions + +### Step 3: Verify VM Status and Diagnose Issues + +**CRITICAL**: After VM creation, verify it can be scheduled successfully. + +**Verification Steps**: + +1. **Wait 5-10 seconds** for initial scheduling attempt + +2. **Check VM status** using Kubernetes API: + +**MCP Tool**: `resources_get` (from openshift-virtualization) + +**Parameters**: +- `apiVersion`: "kubevirt.io/v1" +- `kind`: "VirtualMachine" +- `name`: "" (from Step 1) +- `namespace`: "" (from Step 1) + +Extract `status.printableStatus` from the response. + +**Status Interpretation**: +- `Stopped` / `Halted` → Normal (VM created successfully, not started) +- `Running` → Normal (if autostart=true) +- `Provisioning` → Wait 5 seconds and check again +- `ErrorUnschedulable` → **Proceed to diagnostic workflow below** +- `ErrorDataVolumeNotReady` → Storage issue, proceed to diagnostic workflow + +**Diagnostic Workflow (when ErrorUnschedulable detected)**: + +#### 3a. Consult Troubleshooting Documentation + +**Document Consultation** (REQUIRED): +1. **Action**: Read [troubleshooting.md](../../docs/troubleshooting.md) using the Read tool to understand ErrorUnschedulable causes +2. **Output to user**: "I detected the VM is ErrorUnschedulable. I consulted [troubleshooting.md](../../docs/troubleshooting.md) to diagnose the issue." + +#### 3b. Gather Diagnostic Information + +**Execute diagnostic commands** using MCP tools or bash: + +```bash +# Get VM events to see scheduling failures +oc describe vm -n | grep -A 10 "Events:" + +# Check node taints +oc get nodes -o json | jq '.items[] | select(.spec.taints != null) | {name: .metadata.name, taints: .spec.taints}' +``` + +**Parse results** to identify root cause: +- Events contain "taints that the pod didn't tolerate" → **Taints/Tolerations issue** +- Events contain "Insufficient cpu" or "Insufficient memory" → **Resource constraints** +- Events contain "no nodes available" → **No suitable nodes** + +#### 3c. Present Diagnosis to User + +**Report findings in clear format**: + +```markdown +## ⚠️ VM Scheduling Issue Detected + +**VM Name**: `` +**Namespace**: `` +**Status**: ErrorUnschedulable + +### Diagnosis + +**Root Cause**: Node taints prevent VM scheduling + +**Details**: +- Found X nodes with taint: `virtualization=true:NoSchedule` +- VM spec does not include matching tolerations +- This prevents the VM from being scheduled on virtualization-dedicated nodes + +**Affected Nodes**: +- (taint: ) +- (taint: ) + +### Recommended Solution + +I can apply a workaround to add the required tolerations to your VM. + +**Workaround**: Patch the VirtualMachine to add tolerations + +**Command that will be executed**: +```bash +oc patch vm -n --type=merge -p ' +spec: + template: + spec: + tolerations: + - key: "" + operator: "Equal" + value: "" + effect: "" +' +``` + +**Impact**: +- ✓ VM will be able to schedule on virtualization nodes +- ✓ VM configuration will be updated +- ⚠️ This is a manual workaround (MCP tool doesn't support tolerations yet) + +**Alternative Options**: +1. Accept workaround and I'll apply the patch now +2. Manually apply the patch yourself +3. Contact cluster admin to remove node taints +4. Cancel and delete the VM + +⚠️ **This is a temporary limitation**: The openshift-virtualization MCP server doesn't yet support tolerations parameter in vm_create. Consider filing an issue at: https://github.com/openshift/openshift-mcp-server/issues +``` + +#### 3d. Wait for User Decision (Human-in-the-Loop) + +**CRITICAL**: Do NOT execute the workaround without explicit user confirmation. + +**Prompt user**: +``` +How would you like to proceed? +- "apply workaround" - I'll patch the VM with tolerations +- "manual" - I'll provide instructions for you to apply manually +- "cancel" - Delete the VM and abort creation +- "ignore" - Keep the VM as-is (will remain unschedulable) + +Please respond with your choice. +``` + +#### 3e. Execute Workaround (ONLY if user confirms) + +**ONLY if user responds with "apply workaround" or similar confirmation:** + +**Execute patch command**: +```bash +oc patch vm -n --type=merge -p ' +spec: + template: + spec: + tolerations: + - key: "" + operator: "Equal" + value: "" + effect: "" +' +``` + +**Verify patch success**: +```bash +# Check if tolerations were added +oc get vm -n -o jsonpath='{.spec.template.spec.tolerations}' + +# Wait 5 seconds and check status again +sleep 5 +oc get vm -n -o jsonpath='{.status.printableStatus}' +``` + +**Report result**: +```markdown +## ✓ Workaround Applied Successfully + +**VM Name**: `` +**Namespace**: `` +**Action**: Added tolerations for taint `` + +**New Status**: (VM can now be scheduled) + +**Next Steps**: +To start the VM: +``` +"Start VM in namespace " +``` + +**Note**: This workaround was needed because the MCP tool doesn't yet support tolerations. Future VMs in this cluster will need the same fix until the tool is enhanced. +``` + +### Step 4: Report Creation Status + +**Present results to user in a clear, actionable format:** + +**On Success:** + +```markdown +## ✓ Virtual Machine Created Successfully + +**VM Details:** +- **Name**: `web-server` +- **Namespace**: `vms` +- **Operating System**: Fedora +- **Size**: medium +- **Storage**: 50Gi +- **Status**: Halted (VM is created but not running) + +**Next Steps:** + +To start the VM: +``` +"Start VM web-server in namespace vms" +``` + +To check VM status: +``` +"Show status of VM web-server" +``` + +To view full VM details: +``` +"Get details of web-server VM" +``` + +**Note**: The VM is created in a halted state. Use the `/vm-lifecycle-manager` skill to start it. +``` + +**On Failure:** + +```markdown +## ❌ Failed to Create Virtual Machine + +**Error**: + +**Common Causes:** +- **Namespace doesn't exist** - Create the namespace first: `oc create namespace ` +- **Insufficient RBAC permissions** - ServiceAccount lacks permission to create VirtualMachines +- **Cluster resource constraints** - Insufficient CPU, memory, or storage available +- **Invalid parameter values** - Check OS name, size specification, or storage format +- **OpenShift Virtualization not installed** - Operator must be installed on cluster + +**Troubleshooting:** +1. Verify namespace exists: `oc get namespace ` +2. Check permissions: `oc auth can-i create virtualmachines -n ` +3. View cluster capacity: `oc describe nodes` +4. Verify operator installed: `oc get csv -n openshift-cnv` + +Would you like help troubleshooting this error? +``` + +## Advanced Usage + +### Custom Container Disk Images + +Users can provide full container image URLs for the `workload` parameter: + +``` +User: "Create a VM using quay.io/containerdisks/fedora:latest" + +vm_create({ + "namespace": "vms", + "name": "custom-vm", + "workload": "quay.io/containerdisks/fedora:latest" +}) +``` + +### Secondary Networks + +Attach additional networks using the `networks` parameter (requires NetworkAttachmentDefinition resources): + +```json +vm_create({ + "namespace": "vms", + "name": "network-vm", + "workload": "fedora", + "networks": ["vlan-network", "storage-network"] +}) +``` + +Or with custom interface names: + +```json +vm_create({ + "namespace": "vms", + "name": "network-vm", + "workload": "fedora", + "networks": [ + {"name": "eth1", "networkName": "vlan-network"} + ] +}) +``` + +### Performance Tuning + +Use the `performance` parameter to select instance type family: + +```json +vm_create({ + "namespace": "vms", + "name": "compute-vm", + "workload": "fedora", + "performance": "c1", // compute-optimized + "size": "large" +}) +``` + +Options: +- `"u1"` - general-purpose (default) +- `"o1"` - overcommitted +- `"c1"` - compute-optimized +- `"m1"` - memory-optimized + +### Explicit Instance Type + +Specify exact instance type instead of using `size`: + +```json +vm_create({ + "namespace": "vms", + "name": "precise-vm", + "workload": "fedora", + "instancetype": "u1.large" +}) +``` + +## Common Issues + +### Issue 1: Namespace Not Found + +**Error**: "Namespace 'xyz' not found" + +**Solution:** +1. List available namespaces: Suggest using `oc get namespaces` or `kubectl get ns` +2. Create namespace if needed: `oc create namespace ` +3. Verify ServiceAccount has access to the namespace + +### Issue 2: Insufficient Permissions + +**Error**: "Forbidden: User cannot create VirtualMachines in namespace 'xyz'" + +**Solution:** +- Verify KUBECONFIG has appropriate RBAC permissions +- Required permissions: create VirtualMachine resources +- Contact cluster admin for permission grant +- Check ServiceAccount role bindings + +### Issue 3: Resource Constraints + +**Error**: "Insufficient resources to schedule VM" + +**Solution:** +1. Check cluster capacity: CPU, memory available +2. Try smaller `size` (e.g., change "large" to "medium") +3. Reduce `storage` size if possible +4. Contact cluster admin to scale nodes + +### Issue 4: OpenShift Virtualization Not Installed + +**Error**: "VirtualMachine CRD not found" + +**Solution:** +- OpenShift Virtualization operator must be installed +- Verify: `oc get csv -n openshift-cnv` +- Contact cluster admin to install operator + +## Dependencies + +### Required MCP Servers +- `openshift-virtualization` - OpenShift MCP server with KubeVirt toolset + +### Required MCP Tools +- `vm_create` (from openshift-virtualization) - Create VirtualMachine resources + +### Related Skills +- `vm-lifecycle-manager` - Start VMs after creation +- `vm-inventory` - List and view created VMs + +### Reference Documentation +- [troubleshooting.md](../../docs/troubleshooting.md) - VM error diagnosis and workarounds +- [OpenShift Virtualization Documentation](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) +- [KubeVirt VirtualMachine API](https://kubevirt.io/api-reference/) +- [OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server) + +## Security Considerations + +- VMs are created in user-specified namespace (namespace isolation) +- Storage uses cluster-configured storage classes +- Network access controlled by NetworkPolicies +- KUBECONFIG credentials never exposed in output +- All operations audited in Kubernetes API logs +- ServiceAccount permissions limit VM creation scope + +## Example Usage + +**Example 1: Simple VM creation with defaults** + +``` +User: "Create a Fedora VM called web-01 in namespace apps" + +Agent: [Invokes /vm-creator skill] + +## Virtual Machine Configuration + +Please review and confirm the VM configuration: + +| Parameter | Value | Notes | +|-----------|-------|-------| +| VM Name | `web-01` | from user input | +| Namespace | `apps` | from user input | +| Operating System | `fedora` | default | +| Size | (default) | MCP server will select | +| Storage | `30Gi` | default | +| Autostart | `no` | default | + +This will create a new VirtualMachine resource consuming cluster resources. + +Confirm these settings or tell me what to change. + +User: "yes" + +Agent: [Calls vm_create with namespace="apps", name="web-01", workload="fedora"] + +## ✓ Virtual Machine Created Successfully + +VM Details: +- Name: `web-01` +- Namespace: `apps` +- OS: Fedora +- Status: Halted + +Next Steps: "Start VM web-01 in namespace apps" +``` + +**Example 2: Custom configuration with modifications** + +``` +User: "Deploy a large RHEL VM with 100Gi storage called db-server in production namespace" + +Agent: [Invokes /vm-creator skill] + +## Virtual Machine Configuration + +| Parameter | Value | Notes | +|-----------|-------|-------| +| VM Name | `db-server` | from user input | +| Namespace | `production` | from user input | +| Operating System | `rhel` | from user input | +| Size | `large` | from user input | +| Storage | `100Gi` | from user input | +| Autostart | `no` | default | + +Confirm these settings or tell me what to change. + +User: "Actually, autostart it" + +Agent: [Updates configuration] + +## Updated Configuration + +| Parameter | Value | Notes | +|-----------|-------|-------| +| Autostart | `yes` | updated per user request | + +Confirm? + +User: "yes" + +Agent: [Calls vm_create with all parameters including autostart=true] + +## ✓ Virtual Machine Created and Started + +VM Details: +- Name: `db-server` +- Namespace: `production` +- OS: RHEL +- Size: large +- Storage: 100Gi +- Status: Running (autostarted) +``` + +**Example 3: User cancels after reviewing configuration** + +``` +User: "Create a VM named test in namespace dev" + +Agent: [Invokes /vm-creator skill] + +## Virtual Machine Configuration + +| Parameter | Value | Notes | +|-----------|-------|-------| +| VM Name | `test` | from user input | +| Namespace | `dev` | from user input | +| Operating System | `fedora` | default | +| Storage | `30Gi` | default | + +Confirm these settings or tell me what to change. + +User: "no, cancel" + +Agent: VM creation cancelled. No resources were created. +``` diff --git a/rh-virt/skills/vm-inventory/SKILL.md b/rh-virt/skills/vm-inventory/SKILL.md new file mode 100644 index 0000000..46ab0c2 --- /dev/null +++ b/rh-virt/skills/vm-inventory/SKILL.md @@ -0,0 +1,601 @@ +--- +name: vm-inventory +description: | + List and view virtual machines across namespaces with status, resource usage, and health information. Use this skill when users request: + - "List all VMs" + - "Show VMs in namespace [name]" + - "What VMs are running?" + - "Get details of VM [name]" + + This skill provides comprehensive VM inventory and status reporting. + +model: inherit +color: cyan +--- + +# /vm-inventory Skill + +List and inspect virtual machines in OpenShift Virtualization clusters. This skill provides read-only access to VM information without making any modifications. + +## Prerequisites + +**Required MCP Server**: `openshift-virtualization` ([OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server)) + +**Required MCP Tools**: +- Kubernetes API access for VirtualMachine resources (via MCP server) +- Standard Kubernetes resource listing and retrieval capabilities + +**Required Environment Variables**: +- `KUBECONFIG` - Path to Kubernetes configuration file with cluster access + +**Required Cluster Setup**: +- OpenShift cluster (>= 4.19) +- OpenShift Virtualization operator installed +- ServiceAccount with RBAC permissions to list and get VirtualMachine resources + +### Prerequisite Verification + +**Before executing, verify MCP server availability:** + +1. **Check MCP Server Configuration** + - Verify `openshift-virtualization` exists in `.mcp.json` + - If missing → Report to user with setup instructions + +2. **Check Environment Variables** + - Verify `KUBECONFIG` is set (check presence only, never expose value) + - If missing → Report to user + +3. **Verify Cluster Access** (optional quick check) + - Test basic connectivity to cluster + - If fails → Report connection error + +**Human Notification Protocol:** + +When prerequisites fail: + +``` +❌ Cannot execute vm-inventory: MCP server 'openshift-virtualization' is not available + +📋 Setup Instructions: +1. Add openshift-virtualization to .mcp.json: + { + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } + } +2. Set KUBECONFIG environment variable: + export KUBECONFIG="/path/to/your/kubeconfig" +3. Restart Claude Code to reload MCP servers + +🔗 Documentation: https://github.com/openshift/openshift-mcp-server + +❓ How would you like to proceed? +Options: +- "setup" - Help configure the MCP server now +- "skip" - Skip this skill +- "abort" - Stop workflow + +Please respond with your choice. +``` + +⚠️ **SECURITY**: Never display actual KUBECONFIG path or credential values in output. + +## When to Use This Skill + +**Trigger this skill when:** +- User explicitly invokes `/vm-inventory` command +- User wants to see all VMs or VMs in a specific namespace +- User asks about VM status or health +- User needs to find a VM by name +- User wants details about a specific VM configuration + +**User phrases that trigger this skill:** +- "List all VMs" +- "Show VMs in production namespace" +- "What VMs are running?" +- "Get details of VM web-server" +- "Show me the status of database-vm" +- "/vm-inventory" (explicit command) + +**Do NOT use this skill when:** +- User wants to create a VM → Use `/vm-creator` skill instead +- User wants to start/stop VMs → Use `/vm-lifecycle-manager` skill instead +- User wants to modify VM configuration → Different operation (not inventory) + +## Workflow + +### Workflow A: List All VMs (Across All Namespaces) + +**Step 1: Query VirtualMachine Resources** + +Use Kubernetes API to list all VirtualMachine resources across namespaces. + +**Implementation approach:** +``` +oc get virtualmachines -A +# or +kubectl get vms -A +``` + +**Via MCP**: Use standard Kubernetes resource listing with: +- `apiVersion`: `kubevirt.io/v1` +- `kind`: `VirtualMachine` +- No namespace specified (all namespaces) + +**Step 2: Format and Display Results** + +Present VMs organized by namespace with key information: + +```markdown +## 📋 Virtual Machines (All Namespaces) + +### Namespace: production +- ✓ **web-server-01** - Running (4 vCPU, 8Gi RAM) +- ✓ **web-server-02** - Running (4 vCPU, 8Gi RAM) +- ✗ **database-vm** - Stopped (8 vCPU, 16Gi RAM) + +### Namespace: development +- ✓ **test-vm** - Running (2 vCPU, 4Gi RAM) +- ⚠ **debug-vm** - Pending (2 vCPU, 4Gi RAM) + +### Summary: +- **Total VMs**: 5 +- **Running**: 3 +- **Stopped**: 1 +- **Pending**: 1 +``` + +**Status Indicators:** +- ✓ Running/Ready +- ✗ Stopped/Halted +- ⚠ Pending/Starting/Terminating +- ❌ Failed/Error + +### Workflow B: List VMs in Specific Namespace + +**Step 1: Gather Namespace** + +Ask user for namespace if not provided in the request. + +**Step 2: Query VMs in Namespace** + +Use Kubernetes API to list VirtualMachines in the specified namespace. + +**Implementation:** +``` +oc get virtualmachines -n +# or +kubectl get vms -n +``` + +**Step 3: Display Namespace-Specific Results** + +```markdown +## 📋 Virtual Machines in 'production' + +| Name | Status | vCPU | Memory | Age | Node | +|------|--------|------|--------|-----|------| +| web-server-01 | Running | 4 | 8Gi | 15d | worker-01 | +| web-server-02 | Running | 4 | 8Gi | 15d | worker-02 | +| database-vm | Stopped | 8 | 16Gi | 30d | - | + +**Summary**: 3 VMs (2 running, 1 stopped) +``` + +### Workflow C: Get Details of Specific VM + +**Step 1: Gather VM Information** + +Required: +- VM name +- Namespace (ask if not provided) + +**Step 2: Retrieve VM Resource Details** + +Use Kubernetes API to get the specific VirtualMachine resource. + +**Implementation:** +``` +oc get virtualmachine -n -o yaml +# or +kubectl get vm -n -o yaml +``` + +**Step 3: Display Detailed Information** + +```markdown +## 🖥️ Virtual Machine Details + +### Basic Information +- **Name**: `web-server-01` +- **Namespace**: `production` +- **Status**: Running +- **Created**: 15 days ago + +### Configuration +- **Instance Type**: u1.medium +- **Workload**: Fedora +- **Run Strategy**: Always (auto-restart on crash) + +### Resources +- **vCPU**: 4 cores +- **Memory**: 8Gi +- **Storage**: 50Gi +- **Storage Class**: ocs-storagecluster-ceph-rbd + +### Network +- **Primary**: default (pod network) +- **Secondary**: vlan100 (multus - 192.168.100.5) + +### Volumes +- **rootdisk**: 50Gi (DataVolume/PVC) + +### Current State +- **Phase**: Running +- **Ready**: True +- **Node**: worker-01 +- **Pod IP**: 10.129.2.45 +- **Guest OS Uptime**: 12 days + +### Conditions +- ✓ Ready +- ✓ LiveMigratable +- ✓ AgentConnected + +### Labels +- app: web +- env: production +- tier: frontend +``` + +### Workflow D: Filter VMs by Criteria + +**Filtering options:** + +1. **By Status**: + - Running VMs only + - Stopped VMs only + - VMs in error state + +2. **By Labels**: + ``` + User: "Show me all VMs with label app=web" + + Filter: -l app=web + ``` + +3. **By Resource Size**: + - Large VMs (> 8 vCPU) + - Small VMs (< 4 vCPU) + +**Display filtered results with explanation:** +```markdown +## 📋 VMs with label 'app=web' + +Found 3 VMs: +- web-server-01 (production) - Running +- web-server-02 (production) - Running +- web-dev-01 (development) - Running +``` + +## Common Queries and Responses + +### "Show me all running VMs" + +```markdown +## ✓ Running Virtual Machines + +### production +- web-server-01 (4 vCPU, 8Gi RAM, worker-01) +- web-server-02 (4 vCPU, 8Gi RAM, worker-02) + +### development +- test-vm (2 vCPU, 4Gi RAM, worker-03) + +**Total**: 3 running VMs +``` + +### "Which VMs are in production namespace?" + +```markdown +## 📋 VMs in 'production' namespace + +1. **web-server-01** - Running + - Resources: 4 vCPU, 8Gi RAM + - Age: 15 days + +2. **web-server-02** - Running + - Resources: 4 vCPU, 8Gi RAM + - Age: 15 days + +3. **database-vm** - Stopped + - Resources: 8 vCPU, 16Gi RAM + - Age: 30 days +``` + +### "Get status of VM web-server-01" + +```markdown +## Status: web-server-01 + +- **Namespace**: production +- **Status**: ✓ Running +- **Health**: Healthy +- **Uptime**: 12 days +- **Node**: worker-01 +- **IP**: 10.129.2.45 + +All systems operational. +``` + +### "Show me VMs that are stopped" + +```markdown +## ✗ Stopped Virtual Machines + +### production +- database-vm (8 vCPU, 16Gi RAM) + - Stopped 5 days ago + - Reason: Manual shutdown + +### development +- old-test-vm (2 vCPU, 4Gi RAM) + - Stopped 20 days ago + +**Total**: 2 stopped VMs + +To start a VM, use: +``` +"Start VM in namespace " +``` +``` + +## Health Dashboard + +When user asks for overall health, provide summary: + +```markdown +## 🏥 VM Health Summary + +### ✓ Healthy (8 VMs) +All VMs running as expected with no issues. + +### ⚠️ Warning (2 VMs) +- **test-vm** (development) + - High memory usage (95%) + - Recommendation: Monitor or increase memory + +- **staging-db** (staging) + - Pod restart count: 5 + - Recommendation: Check application logs + +### ❌ Critical (1 VM) +- **broken-vm** (development) + - Status: CrashLoopBackOff + - Recommendation: Use /vm-troubleshooter skill to diagnose + +### Summary +- **Total**: 11 VMs +- **Healthy**: 73% +- **Need Attention**: 27% + +**Recommendations:** +1. Investigate test-vm memory usage +2. Check staging-db logs using vm-troubleshooter +3. Fix broken-vm configuration +``` + +## Output Formatting Guidelines + +**Use consistent status indicators:** +- ✓ Running/Healthy/Ready +- ✗ Stopped/Halted +- ⚠ Warning/Pending/Migrating +- ❌ Critical/Failed/Error + +**Include key information always:** +- VM name and namespace +- Current status +- Resource allocation (vCPU, memory) +- Age/creation time +- Node placement (for running VMs) + +**Organize by namespace** when showing multiple VMs: +- Groups VMs logically +- Easier to scan +- Clear separation + +**Provide actionable next steps:** +- How to start stopped VMs +- How to get more details +- When to use other skills (troubleshooter, lifecycle-manager) + +## Common Issues + +### Issue 1: No VMs Found + +**Result**: Empty list when querying VMs + +**Possible Causes:** +1. No VMs exist in the cluster/namespace +2. Wrong namespace specified +3. Insufficient RBAC permissions to list VMs + +**Response:** +```markdown +## No Virtual Machines Found + +**Namespace**: production + +No VMs were found in this namespace. + +**Possible reasons:** +- No VMs have been created yet +- VMs may exist in a different namespace +- Insufficient permissions to view VMs + +**Next steps:** +- Create a VM: Use /vm-creator skill +- List all namespaces: "Show me all namespaces" +- Check permissions: `oc auth can-i list virtualmachines -n production` +``` + +### Issue 2: Permission Denied + +**Error**: "Forbidden: User cannot list VirtualMachines" + +**Solution:** +- Verify KUBECONFIG has appropriate RBAC permissions +- Required permissions: list/get VirtualMachine resources +- Contact cluster admin for permission grant +- Check ServiceAccount role bindings + +### Issue 3: Cluster Connection Error + +**Error**: "Unable to connect to cluster" + +**Solution:** +1. Verify KUBECONFIG is set and valid +2. Check cluster is accessible: `oc cluster-info` +3. Verify network connectivity +4. Check if cluster credentials are expired + +## Integration with Other Skills + +**Before creating a VM** (vm-creator): +- Use vm-inventory to check if VM name already exists +- Verify namespace exists and has capacity + +**Before lifecycle operations** (vm-lifecycle-manager): +- Check current VM status using vm-inventory +- Verify VM exists before attempting start/stop/restart + +**For troubleshooting**: +- Get VM overview with vm-inventory first +- Then use vm-troubleshooter (if available) for deep diagnostics + +## Dependencies + +### Required MCP Servers +- `openshift-virtualization` - OpenShift MCP server with KubeVirt toolset + +### Required MCP Tools +- Kubernetes API access for VirtualMachine resources +- Standard resource listing and retrieval capabilities + +### Related Skills +- `vm-creator` - Create VMs after checking inventory +- `vm-lifecycle-manager` - Manage VMs discovered in inventory +- `vm-troubleshooter` (planned) - Diagnose problematic VMs from inventory + +### Reference Documentation +- [OpenShift Virtualization Documentation](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) +- [KubeVirt VirtualMachine API](https://kubevirt.io/api-reference/) +- [Accessing VMs](https://docs.openshift.com/container-platform/latest/virt/virtual_machines/virt-accessing-vm-consoles.html) +- [VM Status Conditions](https://kubevirt.io/user-guide/virtual_machines/vm_status_conditions/) + +## Security Considerations + +- Read-only operations - no modifications to VMs +- Respects Kubernetes RBAC permissions +- Only shows VMs in namespaces user has access to +- KUBECONFIG credentials never exposed in output +- No sensitive VM configuration details displayed by default +- All queries audited in Kubernetes API logs + +## Example Usage + +**Example 1: List all VMs** + +``` +User: "List all VMs" + +Agent: [Invokes /vm-inventory skill] + [Queries all VirtualMachine resources across namespaces] + +## 📋 Virtual Machines (All Namespaces) + +Namespace: production +- ✓ web-server-01 - Running (4 vCPU, 8Gi) +- ✓ web-server-02 - Running (4 vCPU, 8Gi) +- ✗ database-vm - Stopped (8 vCPU, 16Gi) + +Namespace: development +- ✓ test-vm - Running (2 vCPU, 4Gi) + +Summary: 4 VMs (3 running, 1 stopped) +``` + +**Example 2: Get details of specific VM** + +``` +User: "Show me details of web-server-01 in production" + +Agent: [Invokes /vm-inventory skill] + [Retrieves specific VirtualMachine resource] + +## 🖥️ Virtual Machine Details + +Name: web-server-01 +Namespace: production +Status: ✓ Running + +Configuration: +- vCPU: 4 cores +- Memory: 8Gi +- Storage: 50Gi +- OS: Fedora + +Network: +- Pod IP: 10.129.2.45 +- Node: worker-01 + +Health: All systems operational +Uptime: 12 days +``` + +**Example 3: Filter by namespace** + +``` +User: "What VMs are in the development namespace?" + +Agent: [Invokes /vm-inventory skill] + [Queries VMs in development namespace] + +## 📋 VMs in 'development' namespace + +| Name | Status | Resources | Age | +|------|--------|-----------|-----| +| test-vm | Running | 2 vCPU, 4Gi | 5d | +| debug-vm | Stopped | 2 vCPU, 4Gi | 10d | + +Summary: 2 VMs (1 running, 1 stopped) +``` + +**Example 4: No VMs found** + +``` +User: "List VMs in namespace test" + +Agent: [Invokes /vm-inventory skill] + [Queries VMs in test namespace] + +## No Virtual Machines Found + +Namespace: test + +No VMs exist in this namespace. + +To create a VM: +``` +"Create a VM in namespace test" +``` + +The /vm-creator skill will help you set up a new virtual machine. +``` diff --git a/rh-virt/skills/vm-lifecycle-manager/SKILL.md b/rh-virt/skills/vm-lifecycle-manager/SKILL.md new file mode 100644 index 0000000..0e1f756 --- /dev/null +++ b/rh-virt/skills/vm-lifecycle-manager/SKILL.md @@ -0,0 +1,586 @@ +--- +name: vm-lifecycle-manager +description: | + Manage virtual machine lifecycle operations including start, stop, and restart. Use this skill when users request: + - "Start VM [name]" + - "Stop the virtual machine [name]" + - "Restart VM [name]" + - "Power on/off VM [name]" + + This skill handles VM state transitions safely with user confirmation for each action. + +model: inherit +color: blue +--- + +# /vm-lifecycle-manager Skill + +Control virtual machine power state and lifecycle operations in OpenShift Virtualization using the `vm_lifecycle` tool from the openshift-virtualization MCP server. + +## Critical: Human-in-the-Loop Requirements + +**IMPORTANT:** This skill requires explicit user confirmation before executing lifecycle actions. You MUST: + +1. **Wait for user confirmation** on the specific action (start/stop/restart) before executing `vm_lifecycle` +2. **Do NOT proceed** with the lifecycle operation until the user explicitly approves +3. **Present the action clearly** with VM name, namespace, and intended state change +4. **Never auto-execute** VM lifecycle changes without user approval - these operations impact running services +5. **Warn about consequences** - stopping VMs interrupts services, starting consumes resources, restarting causes brief downtime + +If the user says "no" or wants to reconsider, do not proceed with the operation. + +**Why this matters:** +- **Start**: Consumes cluster resources (CPU, memory), affects resource availability for other VMs +- **Stop**: Interrupts running services and applications, may cause service downtime +- **Restart**: Causes brief service interruption, running processes will be terminated +- User should verify they're targeting the correct VM and understand the impact + +## Prerequisites + +**Required MCP Server**: `openshift-virtualization` ([OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server)) + +**Required MCP Tools**: +- `vm_lifecycle` (from openshift-virtualization) - Manage VM power state + +**Required Environment Variables**: +- `KUBECONFIG` - Path to Kubernetes configuration file with cluster access + +**Required Cluster Setup**: +- OpenShift cluster (>= 4.19) +- OpenShift Virtualization operator installed +- ServiceAccount with RBAC permissions to update VirtualMachine resources +- Target VM must already exist + +### Prerequisite Verification + +**Before executing, verify MCP server availability:** + +1. **Check MCP Server Configuration** + - Verify `openshift-virtualization` exists in `.mcp.json` + - If missing → Report to user with setup instructions + +2. **Check Environment Variables** + - Verify `KUBECONFIG` is set (check presence only, never expose value) + - If missing → Report to user + +3. **Verify VM Exists** (optional quick check) + - Confirm target VM exists in specified namespace + - If VM not found → Report error to user + +**Human Notification Protocol:** + +When prerequisites fail: + +``` +❌ Cannot execute vm-lifecycle-manager: MCP server 'openshift-virtualization' is not available + +📋 Setup Instructions: +1. Add openshift-virtualization to .mcp.json: + { + "mcpServers": { + "openshift-virtualization": { + "command": "npx", + "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "env": { + "KUBECONFIG": "${KUBECONFIG}" + } + } + } + } +2. Set KUBECONFIG environment variable: + export KUBECONFIG="/path/to/your/kubeconfig" +3. Restart Claude Code to reload MCP servers + +🔗 Documentation: https://github.com/openshift/openshift-mcp-server + +❓ How would you like to proceed? +Options: +- "setup" - Help configure the MCP server now +- "skip" - Skip this skill +- "abort" - Stop workflow + +Please respond with your choice. +``` + +⚠️ **SECURITY**: Never display actual KUBECONFIG path or credential values in output. + +## When to Use This Skill + +**Trigger this skill when:** +- User explicitly invokes `/vm-lifecycle-manager` command +- User requests starting a stopped/halted VM +- User requests stopping a running VM +- User requests restarting a VM (stop + start) +- User wants to change VM power state + +**User phrases that trigger this skill:** +- "Start VM web-server in namespace vms" +- "Stop the database VM" +- "Restart test-vm" +- "Power on the VM called api-server" +- "/vm-lifecycle-manager" (explicit command) + +**Do NOT use this skill when:** +- User wants to create a new VM → Use `/vm-creator` skill instead +- User wants to list VMs → Use `/vm-inventory` skill instead +- User wants to delete a VM → Different operation (not lifecycle management) + +## Workflow + +### Step 1: Gather Lifecycle Operation Details and Confirm + +**Collect information from user**, then present for confirmation before proceeding. + +**Required Parameters:** +1. **VM Name** - Name of the virtual machine + - Example: "web-server", "database-01", "test-vm" + +2. **Namespace** - OpenShift namespace where VM exists + - Example: "vms", "production", "dev-environment" + +3. **Action** - Lifecycle operation to perform + - `start` - Power on the VM (sets runStrategy: Always) + - `stop` - Power off the VM (sets runStrategy: Halted) + - `restart` - Stop then start the VM (brief service interruption) + +**After gathering parameters, present operation for confirmation:** + +```markdown +## VM Lifecycle Operation + +**Please review and confirm the lifecycle operation:** + +| Parameter | Value | Impact | +|-----------|-------|--------| +| VM Name | `web-server` | [from user input] | +| Namespace | `vms` | [from user input] | +| Action | `start` | VM will power on and consume cluster resources | +| Current Status | `Stopped` | [if known] | +| New Status | `Running` | After successful start | + +**Impact of this operation:** +- **start**: VM will consume CPU and memory resources, applications will start +- **stop**: VM will shut down gracefully, running processes will terminate +- **restart**: VM will stop and start again, causing brief service interruption (~1-2 minutes) + +**This operation will change the VM power state.** + +Confirm this action or tell me what to change. +- yes - Proceed with lifecycle operation +- no - Cancel operation +- modify - Change parameters +``` + +**WAIT for user confirmation before proceeding to Step 2.** Do NOT continue until user explicitly confirms with "yes". + +### Step 2: Execute the Lifecycle Operation + +**ONLY PROCEED AFTER USER CONFIRMATION IN STEP 1.** + +**Use the openshift-virtualization MCP tool:** + +Call `vm_lifecycle` with the confirmed parameters from Step 1. + +**MCP Tool**: `vm_lifecycle` (from openshift-virtualization) + +**Parameters** (based on user input from Step 1): + +```json +{ + "namespace": "", // REQUIRED + "name": "", // REQUIRED + "action": "" // REQUIRED +} +``` + +**Example tool invocation:** +```json +vm_lifecycle({ + "namespace": "vms", + "name": "web-server", + "action": "start" +}) +``` + +**Expected Output:** +- Success: VirtualMachine runStrategy updated, state transition initiated +- Failure: Error message with reason (VM not found, permissions, already in desired state, etc.) + +**Error Handling:** +- If VM not found: Report error, suggest checking VM name and namespace +- If permission denied: Report RBAC error, suggest checking ServiceAccount permissions +- If VM already in desired state: Inform user (e.g., "VM is already running") +- If state transition fails: Report error with details + +### Step 3: Report Operation Status + +**Present results to user in a clear, actionable format:** + +**On Success (start):** + +```markdown +## ✓ Virtual Machine Started Successfully + +**VM Details:** +- **Name**: `web-server` +- **Namespace**: `vms` +- **Action**: start +- **Previous Status**: Stopped +- **Current Status**: Running +- **RunStrategy**: Always (VM will auto-restart on crash) + +**Next Steps:** + +To check VM status: +``` +"Show status of VM web-server" +``` + +To view VM details: +``` +"Get details of web-server VM" +``` + +To access the VM: +- Use virtctl console or SSH (depending on VM configuration) + +**Note**: The VM is now consuming cluster resources (CPU, memory). +``` + +**On Success (stop):** + +```markdown +## ✓ Virtual Machine Stopped Successfully + +**VM Details:** +- **Name**: `database-vm` +- **Namespace**: `vms` +- **Action**: stop +- **Previous Status**: Running +- **Current Status**: Stopped +- **RunStrategy**: Halted (VM will remain off until started) + +**Impact:** +- VM is no longer consuming cluster resources +- All running processes have been terminated +- VM state preserved (can be started again) + +**Next Steps:** + +To start the VM again: +``` +"Start VM database-vm in namespace vms" +``` + +**Note**: The VM will remain stopped until explicitly started. +``` + +**On Success (restart):** + +```markdown +## ✓ Virtual Machine Restarted Successfully + +**VM Details:** +- **Name**: `test-vm` +- **Namespace**: `vms` +- **Action**: restart +- **Status**: Running (after stop + start) +- **RunStrategy**: Always + +**What happened:** +1. VM was gracefully stopped +2. VM was started again +3. Applications inside the VM are now restarting + +**Expected behavior:** +- Brief service interruption occurred (~1-2 minutes) +- VM is now running with fresh state +- Applications may take additional time to fully initialize + +**Note**: Monitor application logs to confirm services are healthy after restart. +``` + +**On Failure:** + +```markdown +## ❌ Failed to Execute Lifecycle Operation + +**Error**: + +**Common Causes:** +- **VM not found** - Check VM name and namespace are correct +- **Insufficient RBAC permissions** - ServiceAccount lacks permission to update VirtualMachines +- **VM already in desired state** - VM is already running/stopped +- **VM in transition** - VM is currently starting/stopping, wait for completion +- **Resource constraints** (for start) - Insufficient cluster resources to start VM + +**Troubleshooting:** +1. Verify VM exists: `oc get vm -n ` +2. Check permissions: `oc auth can-i update virtualmachines -n ` +3. View VM status: Use /vm-inventory skill to check current state +4. Check cluster capacity: `oc describe nodes` (for start operations) + +Would you like help troubleshooting this error? +``` + +## Common Issues + +### Issue 1: VM Not Found + +**Error**: "VirtualMachine 'xyz' not found in namespace 'abc'" + +**Solution:** +1. Verify VM name spelling is correct +2. Check namespace is correct +3. List VMs in namespace: Use `/vm-inventory` skill +4. VM may have been deleted + +### Issue 2: VM Already in Desired State + +**Warning**: "VM is already running" (when attempting start) + +**Solution:** +- This is not an error - VM is already in the desired state +- No action needed +- If you intended to restart, use `action="restart"` instead +- Inform user VM is already running + +### Issue 3: Permission Denied + +**Error**: "Forbidden: User cannot update VirtualMachines in namespace 'xyz'" + +**Solution:** +- Verify KUBECONFIG has appropriate RBAC permissions +- Required permissions: update VirtualMachine resources +- Contact cluster admin for permission grant +- Check ServiceAccount role bindings + +### Issue 4: VM Stuck in Transitioning State + +**Error**: "VM is stuck in 'Terminating' or 'Starting' state" + +**Solution:** +1. Wait 30-60 seconds for state transition to complete +2. Check VM events for errors: `oc describe vm -n ` +3. Use `/vm-troubleshooter` skill (if available) to diagnose +4. Check virt-launcher pod status +5. May require cluster admin intervention + +### Issue 5: Insufficient Resources (Start Operation) + +**Error**: "Insufficient CPU/memory to start VM" + +**Solution:** +1. Check cluster resource availability +2. Stop other VMs first to free resources +3. Consider scaling cluster nodes +4. Resize VM to smaller instance type (using `/vm-creator` to recreate) + +## Understanding RunStrategy + +When you execute lifecycle actions, the VM's `runStrategy` changes: + +| Action | RunStrategy After | Behavior | +|--------|------------------|----------| +| start | Always | VM runs and auto-restarts on crash | +| stop | Halted | VM stops and stays stopped | +| restart | Always | VM stops, then starts, then auto-restarts on crash | + +**RunStrategy Values:** +- `Always` - VM should always be running (restarts automatically on crash) +- `Halted` - VM should be stopped (stays off) +- `Manual` - VM controlled manually (not modified by this skill) +- `RerunOnFailure` - Restart only on failure (not used by this skill) + +**Note**: This skill sets `Always` for start/restart and `Halted` for stop. + +## Advanced Usage + +### Batch Operations + +Execute lifecycle operations on multiple VMs: + +``` +User: "Stop VMs web-01, web-02, web-03 in namespace production" + +For each VM: +1. Gather parameters (namespace, name, action=stop) +2. Confirm with user (show all VMs to be stopped) +3. Execute vm_lifecycle for each VM sequentially +4. Report results for each +``` + +**Confirmation format for batch:** +```markdown +## Batch Lifecycle Operation + +Stopping 3 VMs in namespace 'production': +- web-01 +- web-02 +- web-03 + +**Impact**: All 3 VMs will shut down, services will be interrupted. + +Proceed? (yes/no) +``` + +### Conditional Operations + +Start VM only if currently stopped: + +``` +User: "Start web-server if it's not running" + +1. Check current status (using vm-inventory or describe) +2. If stopped: proceed with start operation +3. If running: inform user "VM is already running" +4. If unknown: attempt start (will fail gracefully if already running) +``` + +## Dependencies + +### Required MCP Servers +- `openshift-virtualization` - OpenShift MCP server with KubeVirt toolset + +### Required MCP Tools +- `vm_lifecycle` (from openshift-virtualization) - Manage VM power state (start/stop/restart) + +### Related Skills +- `vm-creator` - Create VMs before managing their lifecycle +- `vm-inventory` - Check current VM status before lifecycle operations +- `vm-troubleshooter` (planned) - Diagnose VM startup/shutdown issues + +### Reference Documentation +- [OpenShift Virtualization Documentation](https://docs.openshift.com/container-platform/latest/virt/about_virt/about-virt.html) +- [KubeVirt VirtualMachine Lifecycle](https://kubevirt.io/user-guide/virtual_machines/lifecycle/) +- [RunStrategy Documentation](https://kubevirt.io/user-guide/virtual_machines/run_strategies/) +- [OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server) + +## Security Considerations + +- VM lifecycle changes require RBAC permissions (update VirtualMachine resources) +- Stop operations gracefully shut down VMs (no data loss) +- Start operations consume cluster resources (quota limits apply) +- All operations audited in Kubernetes API logs +- KUBECONFIG credentials never exposed in output +- ServiceAccount permissions limit which VMs can be controlled + +## Example Usage + +**Example 1: Start a stopped VM** + +``` +User: "Start VM web-server in namespace vms" + +Agent: [Invokes /vm-lifecycle-manager skill] + +## VM Lifecycle Operation + +Please review and confirm the lifecycle operation: + +| Parameter | Value | Impact | +|-----------|-------|--------| +| VM Name | `web-server` | from user input | +| Namespace | `vms` | from user input | +| Action | `start` | VM will power on and consume cluster resources | + +This operation will change the VM power state. + +Confirm this action or tell me what to change. + +User: "yes" + +Agent: [Calls vm_lifecycle with namespace="vms", name="web-server", action="start"] + +## ✓ Virtual Machine Started Successfully + +VM Details: +- Name: `web-server` +- Namespace: `vms` +- Action: start +- Current Status: Running +- RunStrategy: Always + +The VM is now running and consuming cluster resources. +``` + +**Example 2: Stop a running VM with user reconsidering** + +``` +User: "Stop the database VM in production namespace" + +Agent: [Invokes /vm-lifecycle-manager skill] + +## VM Lifecycle Operation + +| Parameter | Value | Impact | +|-----------|-------|--------| +| VM Name | `database` | from user input | +| Namespace | `production` | from user input | +| Action | `stop` | VM will shut down gracefully, running processes will terminate | + +**Impact**: VM will shut down, database services will stop, causing service interruption. + +Confirm this action or tell me what to change. + +User: "wait, no, don't stop it" + +Agent: Operation cancelled. VM 'database' in namespace 'production' remains running. +``` + +**Example 3: Restart a VM** + +``` +User: "Restart test-vm in namespace dev" + +Agent: [Invokes /vm-lifecycle-manager skill] + +## VM Lifecycle Operation + +| Parameter | Value | Impact | +|-----------|-------|--------| +| VM Name | `test-vm` | from user input | +| Namespace | `dev` | from user input | +| Action | `restart` | VM will stop and start again, causing brief service interruption | + +**Impact**: Brief service interruption (~1-2 minutes) while VM restarts. + +Confirm this action? + +User: "yes" + +Agent: [Calls vm_lifecycle with namespace="dev", name="test-vm", action="restart"] + +## ✓ Virtual Machine Restarted Successfully + +VM Details: +- Name: `test-vm` +- Namespace: `dev` +- Action: restart +- Status: Running (after stop + start) + +Brief service interruption occurred. Monitor application logs to confirm services are healthy. +``` + +**Example 4: VM already in desired state** + +``` +User: "Start web-server in namespace vms" + +Agent: [Invokes /vm-lifecycle-manager skill] + [Calls vm_lifecycle with action="start"] + +## ℹ️ VM Already Running + +**VM Details:** +- **Name**: `web-server` +- **Namespace**: `vms` +- **Current Status**: Running + +**Result**: No action taken - VM is already in the desired state. + +If you intended to restart the VM, use: +``` +"Restart VM web-server in namespace vms" +``` +``` From 0f65cfae574088cc1c7439d46626bf2aaa47ff85 Mon Sep 17 00:00:00 2001 From: r2dedios Date: Mon, 9 Feb 2026 16:14:20 +0100 Subject: [PATCH 2/3] doc(rh-virt): Included copilot feedback and improved skills definition; chore(rh-virt): replaced kubernetes-mcp-server by openshift-mcp-server Signed-off-by: r2dedios --- rh-virt/.mcp.json | 21 +- rh-virt/README.md | 166 ++++++++-- rh-virt/skills/vm-creator/SKILL.md | 40 ++- rh-virt/skills/vm-inventory/SKILL.md | 325 ++++++++++++++++--- rh-virt/skills/vm-lifecycle-manager/SKILL.md | 33 +- 5 files changed, 492 insertions(+), 93 deletions(-) diff --git a/rh-virt/.mcp.json b/rh-virt/.mcp.json index 7fd261b..80cf04a 100644 --- a/rh-virt/.mcp.json +++ b/rh-virt/.mcp.json @@ -1,10 +1,27 @@ { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" + }, + "description": "Red Hat Openshift MCP server for interacting with Openshift Container Platform clusters and its operators", + "security": { + "isolation": "container", + "network": "local", + "credentials": "env-only" } } } diff --git a/rh-virt/README.md b/rh-virt/README.md index 4bef002..0d8dc99 100644 --- a/rh-virt/README.md +++ b/rh-virt/README.md @@ -1,4 +1,4 @@ -# Red Hat Virtualization Agentic Pack +# Red Hat Openshift Virtualization (Kubevirt) Agentic Pack OpenShift Virtualization management tools for administering virtual machines on OpenShift clusters. This pack provides automation capabilities for VM lifecycle management, provisioning, and inventory operations using KubeVirt. @@ -38,6 +38,45 @@ oc get virtualmachines -A kubectl get vms -A ``` +### Building the MCP Server Container Image + +The OpenShift MCP server is not published to public registries, so you need to build it locally before using this plugin. + +**Prerequisites**: +- Git +- Podman (or Docker) + +**Build Steps**: + +1. Clone the openshift-mcp-server repository: + ```bash + git clone https://github.com/openshift/openshift-mcp-server.git + cd openshift-mcp-server + ``` + +2. Build the container image using Podman: + ```bash + podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . + ``` + + Or using Docker: + ```bash + docker build -t localhost/openshift-mcp-server:latest -f Dockerfile . + ``` + +3. Verify the image was built successfully: + ```bash + podman images localhost/openshift-mcp-server:latest + ``` + + Expected output: + ``` + REPOSITORY TAG IMAGE ID CREATED SIZE + localhost/openshift-mcp-server latest ~192 MB + ``` + +**Note**: The build process takes several minutes as it compiles the Go binary and downloads dependencies. The final image size is approximately 192 MB. + ### Installation (Claude Code) Install the pack as a Claude Code plugin: @@ -67,13 +106,17 @@ Create new virtual machines in OpenShift Virtualization with automatic error dia - "Deploy a virtual machine" - "Provision a VM with specific configuration" +**MCP Tools Used:** +- `vm_create` (kubevirt toolset) - Creates VirtualMachine resources with instance type resolution + **What it does:** -- Creates VirtualMachine resources -- Configures instance specifications -- Sets up storage and networking +- Creates VirtualMachine resources with intelligent defaults +- Automatically resolves instance types based on size hints (small, medium, large, xlarge …) +- Configures storage, networking, and OS workloads - **Automatically diagnoses scheduling issues** (e.g., node taints, resource constraints) -- **Proposes workarounds** for MCP tool limitations +- **Proposes workarounds** for common errors - **Applies fixes** with user confirmation (human-in-the-loop) +- Requires explicit user approval before creating VMs (resource consumption) ### 2. **vm-lifecycle-manager** - VM Power Management @@ -83,16 +126,21 @@ Control VM lifecycle operations including start, stop, and restart. - "Start VM [name]" - "Stop the virtual machine [name]" - "Restart VM [name]" +- "Power on/off VM [name]" + +**MCP Tools Used:** +- `vm_lifecycle` (kubevirt toolset) - Manages VM power state transitions **What it does:** -- Starts stopped/halted VMs -- Stops running VMs gracefully +- Starts stopped/halted VMs (changes runStrategy to Always) +- Stops running VMs gracefully (changes runStrategy to Halted) - Restarts VMs (stop + start sequence) -- Manages VM runStrategy transitions +- Manages VM runStrategy transitions safely +- Requires explicit user confirmation for each operation (prevents accidental service disruption) ### 3. **vm-inventory** - VM Discovery and Status -List and inspect virtual machines across namespaces. +List and inspect virtual machines across namespaces with comprehensive status information. **Use when:** - "List all VMs" @@ -100,41 +148,91 @@ List and inspect virtual machines across namespaces. - "Get details of VM [name]" - "What VMs are running?" +**MCP Tools Used:** +- `resources_list` (core toolset) - Lists VirtualMachine resources across namespaces +- `resources_get` (core toolset) - Retrieves detailed VM specifications and status + **What it does:** -- Lists VMs across namespaces -- Shows VM status and health -- Provides detailed VM configuration -- Filters VMs by labels or fields +- Lists VMs across all namespaces or specific namespace +- Shows VM status (Running, Stopped, Provisioning, Error) and readiness +- Provides detailed VM configuration (vCPU, memory, storage, networks) +- Filters VMs by labels or field selectors +- Displays resource usage, node placement, and health conditions +- Read-only operations with fallback to `oc` CLI if MCP tools unavailable ## MCP Server Integration -The pack integrates with the OpenShift MCP server (configured in `.mcp.json`): +The pack integrates with the OpenShift MCP server (configured in `.mcp.json`), which provides two toolsets for comprehensive cluster and virtualization management: -### **openshift-virtualization** - OpenShift MCP Server (KubeVirt Toolset) +### **openshift-virtualization** - OpenShift MCP Server -Provides access to KubeVirt virtual machine operations through the Model Context Protocol. +Provides access to both Kubernetes core operations and KubeVirt virtual machine management through the Model Context Protocol. **Repository**: https://github.com/openshift/openshift-mcp-server -**Available Tools**: -- `vm_create` - Create new VirtualMachines +**Enabled Toolsets**: `core` and `kubevirt` (via `--toolsets core,kubevirt`) + +**Available Toolsets**: + +The server provides two toolsets enabled via `--toolsets core,kubevirt`: + +**KubeVirt Toolset** (`kubevirt`): +- `vm_create` - Create new VirtualMachines with instance type resolution and OS selection - `vm_lifecycle` - Manage VM power state (start/stop/restart) +**Core Toolset** (`core`): +- `resources_list` - List Kubernetes resources (VMs, Pods, Deployments, etc.) +- `resources_get` - Get detailed resource information +- `resources_create_or_update` - Create or update Kubernetes resources +- `resources_delete` - Delete Kubernetes resources +- `resources_scale` - Scale deployments and statefulsets +- `pods_list`, `pods_list_in_namespace` - List pods across namespaces or in specific namespace +- `pods_get`, `pods_log`, `pods_exec`, `pods_delete`, `pods_run` - Pod operations +- `pods_top` - Resource consumption metrics for pods +- `nodes_top`, `nodes_log`, `nodes_stats_summary` - Node operations and metrics +- `events_list` - List cluster events for debugging +- `namespaces_list`, `projects_list` - Namespace and project discovery + **Configuration**: ```json { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" + }, + "description": "Red Hat Openshift MCP server for interacting with Openshift Container Platform clusters and its operators", + "security": { + "isolation": "container", + "network": "local", + "credentials": "env-only" } } } } ``` +**Configuration Details**: +- `--userns=keep-id:uid=65532,gid=65532` - Maps container user namespace for rootless Podman security +- `,Z` flag on volume mount - Applies SELinux context label for container access to kubeconfig +- `--entrypoint /app/kubernetes-mcp-server` - Specifies the MCP server binary to execute +- `--kubeconfig /kubeconfig` - Path to kubeconfig inside the container +- `--toolsets core,kubevirt` - Enables both core Kubernetes and KubeVirt-specific tool collections +- `--network=host` - Required for accessing local/remote Kubernetes clusters + ## Sample Workflows ### Workflow 1: Create and Start VM @@ -210,10 +308,27 @@ MCP server is configured in `.mcp.json`: { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" + }, + "description": "Red Hat Openshift MCP server for interacting with Openshift Container Platform clusters and its operators", + "security": { + "isolation": "container", + "network": "local", + "credentials": "env-only" } } } @@ -221,7 +336,12 @@ MCP server is configured in `.mcp.json`: ``` **Key Configuration Notes**: -- Uses `KUBECONFIG` environment variable for cluster authentication +- Uses Podman to run locally-built container image `localhost/openshift-mcp-server:latest` +- `--userns=keep-id:uid=65532,gid=65532` - Enables rootless container security with user namespace mapping +- Mounts `KUBECONFIG` as read-only volume inside container with `,Z` for SELinux labeling +- `--entrypoint /app/kubernetes-mcp-server` - Specifies the MCP server binary +- `--toolsets core,kubevirt` - Enables both core Kubernetes and KubeVirt-specific tools +- Uses `--network=host` for cluster access (required for local/remote clusters) - Requires OpenShift Virtualization operator installed on the cluster - ServiceAccount needs RBAC permissions for VirtualMachine resources diff --git a/rh-virt/skills/vm-creator/SKILL.md b/rh-virt/skills/vm-creator/SKILL.md index 08a3a83..3e96412 100644 --- a/rh-virt/skills/vm-creator/SKILL.md +++ b/rh-virt/skills/vm-creator/SKILL.md @@ -21,11 +21,11 @@ Create virtual machines in OpenShift Virtualization using the `vm_create` tool f **IMPORTANT:** This skill requires explicit user confirmation before creating VMs. You MUST: -1. **Wait for user confirmation** on all VM configuration parameters before executing `vm_create` -2. **Do NOT proceed** with VM creation until the user explicitly approves the configuration -3. **Present configuration clearly** in a table format and wait for user response -4. **Never auto-execute** VM creation without user approval - creating VMs is a destructive operation that consumes cluster resources -5. **Allow modifications** - If user wants to change parameters, update and re-confirm before proceeding +1. **Wait for user confirmation** on all VM configuration parameters before executing `vm_create`. +2. **Do NOT proceed** with VM creation until the user explicitly approves the configuration. +3. **Present configuration clearly** in a table format and wait for user response. +4. **Never auto-execute** VM creation without user approval - creating VMs is a additive operation that consumes cluster resources, affects quotas and may incur cost. +5. **Allow modifications** - If user wants to change parameters, update and re-confirm before proceeding. If the user says "no" or wants modifications, address their concerns before proceeding. @@ -75,21 +75,39 @@ When prerequisites fail: ❌ Cannot execute vm-creator: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Add openshift-virtualization to .mcp.json: +1. Build the OpenShift MCP server container image locally: + git clone https://github.com/openshift/openshift-mcp-server.git + cd openshift-mcp-server + podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . + +2. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" } } } } -2. Set KUBECONFIG environment variable: + +3. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -3. Restart Claude Code to reload MCP servers + +4. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server @@ -196,7 +214,7 @@ Call `vm_create` with the confirmed parameters from Step 1. "namespace": "", // REQUIRED "name": "", // REQUIRED "workload": "", // OPTIONAL (default: "fedora") - "size": "", // OPTIONAL + "size": "",// OPTIONAL "storage": "", // OPTIONAL (default: "30Gi") "autostart": // OPTIONAL (default: false) } diff --git a/rh-virt/skills/vm-inventory/SKILL.md b/rh-virt/skills/vm-inventory/SKILL.md index 46ab0c2..009024c 100644 --- a/rh-virt/skills/vm-inventory/SKILL.md +++ b/rh-virt/skills/vm-inventory/SKILL.md @@ -22,8 +22,13 @@ List and inspect virtual machines in OpenShift Virtualization clusters. This ski **Required MCP Server**: `openshift-virtualization` ([OpenShift MCP Server](https://github.com/openshift/openshift-mcp-server)) **Required MCP Tools**: -- Kubernetes API access for VirtualMachine resources (via MCP server) -- Standard Kubernetes resource listing and retrieval capabilities +- `resources_list` (from openshift-virtualization) - List Kubernetes resources including VirtualMachines +- `resources_get` (from openshift-virtualization) - Get specific Kubernetes resource details + +**Fallback CLI Commands** (if MCP tools are unavailable): +- `oc get virtualmachines` - List VirtualMachines using OpenShift CLI +- `oc get vm` - Shorthand for listing VirtualMachines +- `oc get vm -n -o yaml` - Get VM details in YAML format **Required Environment Variables**: - `KUBECONFIG` - Path to Kubernetes configuration file with cluster access @@ -57,27 +62,46 @@ When prerequisites fail: ❌ Cannot execute vm-inventory: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Add openshift-virtualization to .mcp.json: +1. Build the OpenShift MCP server container image locally: + git clone https://github.com/openshift/openshift-mcp-server.git + cd openshift-mcp-server + podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . + +2. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" } } } } -2. Set KUBECONFIG environment variable: + +3. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -3. Restart Claude Code to reload MCP servers + +4. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server ❓ How would you like to proceed? Options: - "setup" - Help configure the MCP server now +- "cli" - Use OpenShift CLI commands as fallback (requires KUBECONFIG) - "skip" - Skip this skill - "abort" - Stop workflow @@ -86,6 +110,11 @@ Please respond with your choice. ⚠️ **SECURITY**: Never display actual KUBECONFIG path or credential values in output. +**Note on Fallback Behavior**: +- If MCP server is unavailable but KUBECONFIG is set, the skill CAN proceed with CLI commands +- Always offer the user the choice between setup (MCP) or CLI fallback +- CLI fallback requires explicit user confirmation before executing any commands + ## When to Use This Skill **Trigger this skill when:** @@ -110,23 +139,70 @@ Please respond with your choice. ## Workflow +**CRITICAL EXECUTION PATTERN**: +1. **ALWAYS attempt MCP server tools FIRST** - Try `resources_list` or `resources_get` from the openshift-virtualization MCP server +2. **If MCP tools fail or are unavailable** - Propose to user to use CLI commands (`oc get vm`, `oc get virtualmachines`) +3. **Never skip MCP attempt** - Even if you suspect they might not be available, always try them first + +**Tool Execution Priority**: +- **Primary**: MCP tools (`resources_list`, `resources_get`) from openshift-virtualization server +- **Fallback**: CLI commands (`oc`) - Only after MCP tools fail and with user confirmation + +**MCP Tool Reference**: +- Tool source: https://github.com/openshift/openshift-mcp-server/blob/main/pkg/toolsets/core/resources.go +- Tool names: `resources_list`, `resources_get` +- These tools provide direct access to Kubernetes resources through the MCP protocol + ### Workflow A: List All VMs (Across All Namespaces) -**Step 1: Query VirtualMachine Resources** +**Step 1: Query VirtualMachine Resources Using MCP Tool** -Use Kubernetes API to list all VirtualMachine resources across namespaces. +**PRIMARY: MCP Tool**: `resources_list` (from openshift-virtualization) -**Implementation approach:** -``` -oc get virtualmachines -A -# or -kubectl get vms -A +**Parameters**: +```json +{ + "apiVersion": "kubevirt.io/v1", + "kind": "VirtualMachine", + "allNamespaces": true +} ``` -**Via MCP**: Use standard Kubernetes resource listing with: -- `apiVersion`: `kubevirt.io/v1` -- `kind`: `VirtualMachine` -- No namespace specified (all namespaces) +**Expected Output**: List of VirtualMachine resources with: +- Name +- Namespace +- Status (Running/Stopped/Pending/Error) +- Resource specifications (vCPU, memory) +- Age +- Node assignment (for running VMs) + +**Error Handling**: +If the MCP tool call fails (tool not found, connection error, etc.): + +1. **Report to user**: + ``` + ⚠️ MCP tool 'resources_list' is not available or failed to execute. + + 📋 I can use the OpenShift CLI instead to list VMs. + + Would you like me to proceed with: `oc get virtualmachines -A`? + + (Respond "yes" to proceed with CLI, or "setup" to configure MCP server) + ``` + +2. **Wait for user confirmation** + +3. **If user approves**, execute CLI fallback: + ```bash + oc get virtualmachines -A -o json + ``` + +**CLI Fallback Command**: +```bash +oc get virtualmachines --all-namespaces -o json +# or shorthand: +oc get vm -A -o json +``` **Step 2: Format and Display Results** @@ -163,15 +239,46 @@ Present VMs organized by namespace with key information: Ask user for namespace if not provided in the request. -**Step 2: Query VMs in Namespace** +**Step 2: Query VMs in Namespace Using MCP Tool** -Use Kubernetes API to list VirtualMachines in the specified namespace. +**PRIMARY: MCP Tool**: `resources_list` (from openshift-virtualization) -**Implementation:** +**Parameters**: +```json +{ + "apiVersion": "kubevirt.io/v1", + "kind": "VirtualMachine", + "namespace": "" // REQUIRED - user-provided namespace +} ``` -oc get virtualmachines -n -# or -kubectl get vms -n + +**Expected Output**: List of VirtualMachine resources in the specified namespace with status and configuration details + +**Error Handling**: +If the MCP tool call fails: + +1. **Report to user**: + ``` + ⚠️ MCP tool 'resources_list' failed. + + 📋 Fallback option: Use OpenShift CLI command: + `oc get virtualmachines -n ` + + Would you like me to proceed with the CLI command? + ``` + +2. **Wait for user confirmation** + +3. **If approved**, execute CLI fallback: + ```bash + oc get virtualmachines -n -o json + ``` + +**CLI Fallback Command**: +```bash +oc get virtualmachines -n -o json +# or shorthand: +oc get vm -n -o json ``` **Step 3: Display Namespace-Specific Results** @@ -196,15 +303,50 @@ Required: - VM name - Namespace (ask if not provided) -**Step 2: Retrieve VM Resource Details** +**Step 2: Retrieve VM Resource Details Using MCP Tool** -Use Kubernetes API to get the specific VirtualMachine resource. +**PRIMARY: MCP Tool**: `resources_get` (from openshift-virtualization) -**Implementation:** +**Parameters**: +```json +{ + "apiVersion": "kubevirt.io/v1", + "kind": "VirtualMachine", + "namespace": "", // REQUIRED - user-provided or prompted + "name": "" // REQUIRED - user-provided +} ``` + +**Expected Output**: Complete VirtualMachine resource specification including: +- Metadata (name, namespace, labels, annotations, creation timestamp) +- Spec (instance type, workload, run strategy, resource requirements, volumes, networks) +- Status (conditions, phase, ready state, node assignment, pod IP, guest agent info) + +**Error Handling**: +If the MCP tool call fails: + +1. **Report to user**: + ``` + ⚠️ MCP tool 'resources_get' failed. + + 📋 Fallback option: Use OpenShift CLI command: + `oc get vm -n -o yaml` + + Would you like me to proceed with the CLI command? + ``` + +2. **Wait for user confirmation** + +3. **If approved**, execute CLI fallback: + ```bash + oc get virtualmachine -n -o yaml + ``` + +**CLI Fallback Command**: +```bash oc get virtualmachine -n -o yaml -# or -kubectl get vm -n -o yaml +# or shorthand: +oc get vm -n -o yaml ``` **Step 3: Display Detailed Information** @@ -256,23 +398,66 @@ kubectl get vm -n -o yaml ### Workflow D: Filter VMs by Criteria -**Filtering options:** +**Step 1: Query VMs with Filters Using MCP Tool** + +**PRIMARY: MCP Tool**: `resources_list` (from openshift-virtualization) -1. **By Status**: - - Running VMs only - - Stopped VMs only - - VMs in error state +**Parameters** (with label selector): +```json +{ + "apiVersion": "kubevirt.io/v1", + "kind": "VirtualMachine", + "allNamespaces": true, // or specify "namespace": "" + "labelSelector": "app=web" // OPTIONAL - filter by labels (e.g., "app=web", "env=production") +} +``` -2. **By Labels**: +**Error Handling**: +If the MCP tool call fails: + +1. **Report to user**: ``` - User: "Show me all VMs with label app=web" + ⚠️ MCP tool 'resources_list' failed. + + 📋 Fallback: Use OpenShift CLI with label selector: + `oc get virtualmachines -A -l app=web` - Filter: -l app=web + Would you like me to proceed with the CLI command? ``` -3. **By Resource Size**: - - Large VMs (> 8 vCPU) - - Small VMs (< 4 vCPU) +2. **Wait for user confirmation** + +3. **If approved**, execute CLI fallback: + ```bash + oc get virtualmachines -A -l -o json + ``` + +**CLI Fallback Command**: +```bash +# With label selector +oc get virtualmachines --all-namespaces -l -o json +# Example: +oc get vm -A -l app=web -o json +``` + +**Filtering options:** + +1. **By Labels** (via labelSelector parameter): + - `"app=web"` - Single label match + - `"app=web,env=production"` - Multiple labels (AND logic) + - `"tier in (frontend,backend)"` - Set-based selector + +2. **By Status** (post-processing after retrieval): + - Filter returned results by status field + - Running VMs: `status.printableStatus == "Running"` + - Stopped VMs: `status.printableStatus == "Stopped"` + - VMs in error state: Check status.conditions for errors + +3. **By Resource Size** (post-processing after retrieval): + - Parse instance type or resource specs from returned VMs + - Filter based on vCPU/memory requirements + +**Step 2: Display Filtered Results** **Display filtered results with explanation:** ```markdown @@ -483,11 +668,24 @@ No VMs were found in this namespace. ## Dependencies ### Required MCP Servers -- `openshift-virtualization` - OpenShift MCP server with KubeVirt toolset +- `openshift-virtualization` - OpenShift MCP server (https://github.com/openshift/openshift-mcp-server) + +### Required MCP Tools (PRIMARY - Always try these first) +- `resources_list` (from openshift-virtualization) - List Kubernetes resources including VirtualMachines + - Parameters: apiVersion, kind, namespace (optional), allNamespaces (optional), labelSelector (optional) + - Source: https://github.com/openshift/openshift-mcp-server/blob/main/pkg/toolsets/core/resources.go +- `resources_get` (from openshift-virtualization) - Get specific Kubernetes resource details + - Parameters: apiVersion, kind, namespace, name + - Source: https://github.com/openshift/openshift-mcp-server/blob/main/pkg/toolsets/core/resources.go + +### CLI Fallback Commands (Use only if MCP tools fail) +- `oc get virtualmachines` or `oc get vm` - List VirtualMachines +- `oc get vm -n ` - Get specific VM +- `oc get vm -A` - List VMs across all namespaces +- `oc get vm -n ` - List VMs in specific namespace +- `oc get vm -l ` - Filter VMs by label selector -### Required MCP Tools -- Kubernetes API access for VirtualMachine resources -- Standard resource listing and retrieval capabilities +**Important**: Always attempt MCP tools first. Only use CLI commands after MCP tool failure and with user confirmation. ### Related Skills - `vm-creator` - Create VMs after checking inventory @@ -511,13 +709,14 @@ No VMs were found in this namespace. ## Example Usage -**Example 1: List all VMs** +**Example 1: List all VMs (using MCP tool)** ``` User: "List all VMs" Agent: [Invokes /vm-inventory skill] - [Queries all VirtualMachine resources across namespaces] + [Attempts MCP tool: resources_list with apiVersion="kubevirt.io/v1", kind="VirtualMachine", allNamespaces=true] + [Tool succeeds] ## 📋 Virtual Machines (All Namespaces) @@ -532,13 +731,39 @@ Namespace: development Summary: 4 VMs (3 running, 1 stopped) ``` +**Example 1b: List all VMs (MCP tool unavailable, CLI fallback)** + +``` +User: "List all VMs" + +Agent: [Invokes /vm-inventory skill] + [Attempts MCP tool: resources_list] + [Tool fails - not available] + +⚠️ MCP tool 'resources_list' is not available. + +📋 Fallback option: Use OpenShift CLI command: +`oc get virtualmachines -A` + +Would you like me to proceed with the CLI command? + +User: "yes" + +Agent: [Executes: oc get virtualmachines -A -o json] + +## 📋 Virtual Machines (All Namespaces) + +[Same output as above] +``` + **Example 2: Get details of specific VM** ``` User: "Show me details of web-server-01 in production" Agent: [Invokes /vm-inventory skill] - [Retrieves specific VirtualMachine resource] + [Attempts MCP tool: resources_get with apiVersion="kubevirt.io/v1", kind="VirtualMachine", namespace="production", name="web-server-01"] + [Tool succeeds] ## 🖥️ Virtual Machine Details @@ -566,7 +791,8 @@ Uptime: 12 days User: "What VMs are in the development namespace?" Agent: [Invokes /vm-inventory skill] - [Queries VMs in development namespace] + [Attempts MCP tool: resources_list with apiVersion="kubevirt.io/v1", kind="VirtualMachine", namespace="development"] + [Tool succeeds] ## 📋 VMs in 'development' namespace @@ -584,7 +810,8 @@ Summary: 2 VMs (1 running, 1 stopped) User: "List VMs in namespace test" Agent: [Invokes /vm-inventory skill] - [Queries VMs in test namespace] + [Attempts MCP tool: resources_list with apiVersion="kubevirt.io/v1", kind="VirtualMachine", namespace="test"] + [Returns empty list] ## No Virtual Machines Found diff --git a/rh-virt/skills/vm-lifecycle-manager/SKILL.md b/rh-virt/skills/vm-lifecycle-manager/SKILL.md index 0e1f756..9eccd61 100644 --- a/rh-virt/skills/vm-lifecycle-manager/SKILL.md +++ b/rh-virt/skills/vm-lifecycle-manager/SKILL.md @@ -75,21 +75,39 @@ When prerequisites fail: ❌ Cannot execute vm-lifecycle-manager: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Add openshift-virtualization to .mcp.json: +1. Build the OpenShift MCP server container image locally: + git clone https://github.com/openshift/openshift-mcp-server.git + cd openshift-mcp-server + podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . + +2. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { - "command": "npx", - "args": ["-y", "@openshift/openshift-mcp-server", "--toolset", "kubevirt"], + "command": "podman", + "args": [ + "run", + "--rm", + "-i", + "--network=host", + "--userns=keep-id:uid=65532,gid=65532", + "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", + "--entrypoint", "/app/kubernetes-mcp-server", + "localhost/openshift-mcp-server:latest", + "--kubeconfig", "/kubeconfig", + "--toolsets", "core,kubevirt" + ], "env": { "KUBECONFIG": "${KUBECONFIG}" } } } } -2. Set KUBECONFIG environment variable: + +3. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -3. Restart Claude Code to reload MCP servers + +4. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server @@ -389,8 +407,7 @@ When you execute lifecycle actions, the VM's `runStrategy` changes: **RunStrategy Values:** - `Always` - VM should always be running (restarts automatically on crash) - `Halted` - VM should be stopped (stays off) -- `Manual` - VM controlled manually (not modified by this skill) -- `RerunOnFailure` - Restart only on failure (not used by this skill) +- `Restart` - VM should be stopped and then started again **Note**: This skill sets `Always` for start/restart and `Halted` for stop. @@ -459,7 +476,7 @@ User: "Start web-server if it's not running" ## Security Considerations - VM lifecycle changes require RBAC permissions (update VirtualMachine resources) -- Stop operations gracefully shut down VMs (no data loss) +- Stop operations attempt a graceful shutdown of VMs; guest OS and application data consistency depends on the workload and is not fully guaranteed - Start operations consume cluster resources (quota limits apply) - All operations audited in Kubernetes API logs - KUBECONFIG credentials never exposed in output From 9a1de825519cb9c864c7c5e0a07c8c76e7816e3d Mon Sep 17 00:00:00 2001 From: r2dedios Date: Mon, 9 Feb 2026 18:09:54 +0100 Subject: [PATCH 3/3] fix(rh-virt): Updated doc and MCP server image Signed-off-by: r2dedios --- rh-virt/.mcp.json | 2 +- rh-virt/README.md | 11 ++++++----- rh-virt/skills/vm-creator/SKILL.md | 13 ++++--------- rh-virt/skills/vm-inventory/SKILL.md | 13 ++++--------- rh-virt/skills/vm-lifecycle-manager/SKILL.md | 13 ++++--------- 5 files changed, 19 insertions(+), 33 deletions(-) diff --git a/rh-virt/.mcp.json b/rh-virt/.mcp.json index 80cf04a..0d54079 100644 --- a/rh-virt/.mcp.json +++ b/rh-virt/.mcp.json @@ -10,7 +10,7 @@ "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], diff --git a/rh-virt/README.md b/rh-virt/README.md index 0d8dc99..718346f 100644 --- a/rh-virt/README.md +++ b/rh-virt/README.md @@ -67,12 +67,13 @@ The OpenShift MCP server is not published to public registries, so you need to b 3. Verify the image was built successfully: ```bash podman images localhost/openshift-mcp-server:latest + podman tag localhost/openshift-mcp-server:latest quay.io/ecosystem-appeng/openshift-mcp-server:latest ``` Expected output: ``` - REPOSITORY TAG IMAGE ID CREATED SIZE - localhost/openshift-mcp-server latest ~192 MB + REPOSITORY TAG IMAGE ID CREATED SIZE + quay.io/ecosystem-appeng/openshift-mcp-server:latest latest ~192 MB ``` **Note**: The build process takes several minutes as it compiles the Go binary and downloads dependencies. The final image size is approximately 192 MB. @@ -207,7 +208,7 @@ The server provides two toolsets enabled via `--toolsets core,kubevirt`: "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], @@ -317,7 +318,7 @@ MCP server is configured in `.mcp.json`: "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], @@ -336,7 +337,7 @@ MCP server is configured in `.mcp.json`: ``` **Key Configuration Notes**: -- Uses Podman to run locally-built container image `localhost/openshift-mcp-server:latest` +- Uses Podman to run locally-built container image `quay.io/ecosystem-appeng/openshift-mcp-server:latest` - `--userns=keep-id:uid=65532,gid=65532` - Enables rootless container security with user namespace mapping - Mounts `KUBECONFIG` as read-only volume inside container with `,Z` for SELinux labeling - `--entrypoint /app/kubernetes-mcp-server` - Specifies the MCP server binary diff --git a/rh-virt/skills/vm-creator/SKILL.md b/rh-virt/skills/vm-creator/SKILL.md index 3e96412..dc0610a 100644 --- a/rh-virt/skills/vm-creator/SKILL.md +++ b/rh-virt/skills/vm-creator/SKILL.md @@ -75,12 +75,7 @@ When prerequisites fail: ❌ Cannot execute vm-creator: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Build the OpenShift MCP server container image locally: - git clone https://github.com/openshift/openshift-mcp-server.git - cd openshift-mcp-server - podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . - -2. Add openshift-virtualization to .mcp.json: +1. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { @@ -93,7 +88,7 @@ When prerequisites fail: "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], @@ -104,10 +99,10 @@ When prerequisites fail: } } -3. Set KUBECONFIG environment variable: +2. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -4. Restart Claude Code to reload MCP servers +3. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server diff --git a/rh-virt/skills/vm-inventory/SKILL.md b/rh-virt/skills/vm-inventory/SKILL.md index 009024c..7efff7e 100644 --- a/rh-virt/skills/vm-inventory/SKILL.md +++ b/rh-virt/skills/vm-inventory/SKILL.md @@ -62,12 +62,7 @@ When prerequisites fail: ❌ Cannot execute vm-inventory: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Build the OpenShift MCP server container image locally: - git clone https://github.com/openshift/openshift-mcp-server.git - cd openshift-mcp-server - podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . - -2. Add openshift-virtualization to .mcp.json: +1. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { @@ -80,7 +75,7 @@ When prerequisites fail: "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], @@ -91,10 +86,10 @@ When prerequisites fail: } } -3. Set KUBECONFIG environment variable: +2. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -4. Restart Claude Code to reload MCP servers +3. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server diff --git a/rh-virt/skills/vm-lifecycle-manager/SKILL.md b/rh-virt/skills/vm-lifecycle-manager/SKILL.md index 9eccd61..89cce61 100644 --- a/rh-virt/skills/vm-lifecycle-manager/SKILL.md +++ b/rh-virt/skills/vm-lifecycle-manager/SKILL.md @@ -75,12 +75,7 @@ When prerequisites fail: ❌ Cannot execute vm-lifecycle-manager: MCP server 'openshift-virtualization' is not available 📋 Setup Instructions: -1. Build the OpenShift MCP server container image locally: - git clone https://github.com/openshift/openshift-mcp-server.git - cd openshift-mcp-server - podman build -t localhost/openshift-mcp-server:latest -f Dockerfile . - -2. Add openshift-virtualization to .mcp.json: +1. Add openshift-virtualization to .mcp.json: { "mcpServers": { "openshift-virtualization": { @@ -93,7 +88,7 @@ When prerequisites fail: "--userns=keep-id:uid=65532,gid=65532", "-v", "${KUBECONFIG}:/kubeconfig:ro,Z", "--entrypoint", "/app/kubernetes-mcp-server", - "localhost/openshift-mcp-server:latest", + "quay.io/ecosystem-appeng/openshift-mcp-server:latest", "--kubeconfig", "/kubeconfig", "--toolsets", "core,kubevirt" ], @@ -104,10 +99,10 @@ When prerequisites fail: } } -3. Set KUBECONFIG environment variable: +2. Set KUBECONFIG environment variable: export KUBECONFIG="/path/to/your/kubeconfig" -4. Restart Claude Code to reload MCP servers +3. Restart Claude Code to reload MCP servers 🔗 Documentation: https://github.com/openshift/openshift-mcp-server